> ## Documentation Index
> Fetch the complete documentation index at: https://developer.upsun.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Up(sun) and ready with Pandoc

export const PostMeta = ({data = {}}) => {
  const {author, date, image} = data;
  const authors = Array.isArray(author) ? author : author ? [author] : [];
  const resolveAuthor = slug => {
    const entry = AUTHOR_MAP[slug] || ({});
    const name = entry.name || slug;
    const github = entry.github || null;
    const linkedin = entry.linkedin || null;
    const url = github ? `https://github.com/${github}` : linkedin || null;
    const avatarUrl = github ? `https://github.com/${github}.png?size=64` : null;
    return {
      name,
      url,
      avatarUrl
    };
  };
  const formattedDate = date ? new Date(date).toLocaleDateString('en-US', {
    year: 'numeric',
    month: 'long',
    day: 'numeric'
  }) : null;
  if (!image && authors.length === 0 && !formattedDate) return null;
  const AUTHOR_MAP = {
    "aaron-collier": {
      "name": "Aaron Collier"
    },
    "aaron-dudenhofer": {
      "name": "Aaron Dudenhofer"
    },
    "aaron-porter": {
      "name": "Aaron Porter"
    },
    "adriaan-odendaal": {
      "name": "Adriaan Odendaal"
    },
    "ajmal": {
      "name": "Ajmal Siddiqui"
    },
    "akalipetis": {
      "name": "Antonis Kalipetis"
    },
    "alexander-varwijk": {
      "name": "Alexander Varwijk"
    },
    "alicia-bevilacqua": {
      "name": "Alicia Bevilacqua"
    },
    "amelie-deguerry": {
      "name": "Amelie Deguerry"
    },
    "anacidre": {
      "name": "Ana Cidre",
      "linkedin": "https://www.linkedin.com/in/ana-cidre"
    },
    "andoni": {
      "name": "Andoni Auzmendi"
    },
    "andrei-taranu": {
      "name": "Andrei (Alex) Taranu",
      "linkedin": "https://www.linkedin.com/in/andrei-alex-taranu/"
    },
    "andrew-baxter": {
      "name": "Andrew Baxter"
    },
    "andrew-melck": {
      "name": "Andrew Melck"
    },
    "antoine-crochet-damais": {
      "name": "Antoine Crochet Damais"
    },
    "augustin-delaporte": {
      "name": "Augustin Delaporte",
      "linkedin": "https://www.linkedin.com/in/augustindelaporte/"
    },
    "branislav-bujisic": {
      "name": "Branislav Bujisic"
    },
    "carl-smith": {
      "name": "Carl Smith"
    },
    "caroline-leroy": {
      "name": "Caroline Leroy"
    },
    "cati-mayer": {
      "name": "Cati Mayer"
    },
    "catplat": {
      "name": "C Trinkwon"
    },
    "ceelolulu": {
      "name": "Celeste van der Watt"
    },
    "chadwcarlson": {
      "name": "Chad Carlson",
      "github": "chadwcarlson",
      "linkedin": "https://www.linkedin.com/in/chadwcarlson"
    },
    "chris-ward": {
      "name": "Chris Ward"
    },
    "chris-yates": {
      "name": "Chris Yates"
    },
    "christian-sieber": {
      "name": "Christian Sieber"
    },
    "christopher-lockheardt": {
      "name": "Christopher Lockheardt"
    },
    "christopher-skene": {
      "name": "Christopher Skene"
    },
    "chuck-morgan": {
      "name": "Chuck Morgan"
    },
    "corey-dockendorf": {
      "name": "Corey Dockendorf"
    },
    "crell": {
      "name": "Crell"
    },
    "damz": {
      "name": "Damz"
    },
    "dan-morrison": {
      "name": "Dan Morrison"
    },
    "davidbonachera": {
      "name": "David Bonachera",
      "github": "davidbonachera",
      "linkedin": "https://www.linkedin.com/in/davidbonachera"
    },
    "dereliahmet1": {
      "name": "Ahmet Faruk Dereli"
    },
    "devicezero": {
      "name": "Jonas Kröger",
      "github": "devicezero",
      "linkedin": "https://www.linkedin.com/in/jonaskroeger/"
    },
    "doug-goldberg": {
      "name": "Doug Goldberg"
    },
    "duncan-naves": {
      "name": "Duncan Naves",
      "github": "duncannaves",
      "linkedin": "https://www.linkedin.com/in/duncan-naves-a94423aa"
    },
    "erika-bustamante": {
      "name": "Erika Bustamante"
    },
    "fabpot": {
      "name": "Fabien Potencier"
    },
    "flovntp": {
      "name": "Florent Huck",
      "github": "flovntp",
      "linkedin": "https://www.linkedin.com/in/florenthuck"
    },
    "fred-plais": {
      "name": "Fred Plais"
    },
    "gauthier-garnier": {
      "name": "Gauthier Garnier"
    },
    "gilzow": {
      "name": "Paul Gilzow"
    },
    "gmoigneu": {
      "name": "Guillaume Moigneu",
      "github": "gmoigneu",
      "linkedin": "https://www.linkedin.com/in/guillaumemoigneu/"
    },
    "gregqualls": {
      "name": "Greg Qualls"
    },
    "guguss": {
      "name": "Augustin Delaporte"
    },
    "haylee-millar": {
      "name": "Haylee Millar"
    },
    "ivana-kotur": {
      "name": "Ivana Kotur"
    },
    "jackrabbithanna": {
      "name": "Mark Hanna"
    },
    "jared-wright": {
      "name": "Jared Wright",
      "github": "jww-sh",
      "linkedin": "https://www.linkedin.com/in/jaredwaynewright"
    },
    "jessica-orozco": {
      "name": "Jessica Orozco"
    },
    "joey-stanford": {
      "name": "Joey Stanford"
    },
    "john-grubb": {
      "name": "John Grubb"
    },
    "jonas-kruger": {
      "name": "Jonas Kruger"
    },
    "kathryn-frazer": {
      "name": "Kathryn Frazer"
    },
    "kemiojo": {
      "name": "Kemi Elizabeth Ojogbede"
    },
    "kieronsambrook-smith": {
      "name": "Kieronsambrook Smith"
    },
    "laurent-arnoud": {
      "name": "Laurent Arnoud"
    },
    "letoya-boyne": {
      "name": "Letoya Boyne"
    },
    "lolautruche": {
      "name": "Jérôme Vieilledent"
    },
    "lyly-lepinay": {
      "name": "Lyly Lepinay"
    },
    "manauwar-alam": {
      "name": "Manauwar Alam"
    },
    "marc-antoine-porri": {
      "name": "Marc Antoine Porri"
    },
    "maria-antinkaapo": {
      "name": "Maria Antinkaapo"
    },
    "maria-de-anton": {
      "name": "Maria De Anton"
    },
    "mark-dorison": {
      "name": "Mark Dorison"
    },
    "markus-hausammann": {
      "name": "Markus Hausammann"
    },
    "mary-thomas": {
      "name": "Mary Thomas"
    },
    "mathias-bolt-lesniak": {
      "name": "Mathias Bolt Lesniak"
    },
    "mathieu-strauch": {
      "name": "Mathieu Strauch"
    },
    "matthias-van-woensel": {
      "name": "Matthias Van Woensel",
      "linkedin": "https://www.linkedin.com/in/matthias-van-woensel-267a069"
    },
    "michael-sharp": {
      "name": "Michael Sharp"
    },
    "mupsi": {
      "name": "Marine Gandy"
    },
    "natalie-harper": {
      "name": "Natalie Harper"
    },
    "ngommenginger": {
      "name": "Nicolas Gommenginger",
      "linkedin": "https://www.linkedin.com/in/nicolas-gommenginger"
    },
    "nicholas-bennison": {
      "name": "Nicholas Bennison"
    },
    "nicholas-vahalik": {
      "name": "Nicholas Vahalik"
    },
    "nick-hardiman": {
      "name": "Nick Hardiman"
    },
    "nickanderegg": {
      "name": "Nickanderegg"
    },
    "nicolas-grekas": {
      "name": "Nicolas Grekas",
      "github": "nicolas-grekas",
      "linkedin": "https://www.linkedin.com/in/nicolasgrekas/"
    },
    "niti-malwade": {
      "name": "Niti Malwade"
    },
    "opensocialteam": {
      "name": "Opensocialteam"
    },
    "ori-pekelman": {
      "name": "Ori Pekelman"
    },
    "otavio-santana": {
      "name": "Otavio Santana"
    },
    "palwandi": {
      "name": "Pawan Alwandi",
      "github": "pawpy",
      "linkedin": "https://www.linkedin.com/in/pawanalwandi"
    },
    "patrick-boest": {
      "name": "Patrick Boest"
    },
    "patrick-dawkins": {
      "name": "Patrick Dawkins",
      "github": "pjcdawkins",
      "linkedin": "https://www.linkedin.com/in/patrickdawkins"
    },
    "patrick-klima": {
      "name": "Patrick Klima"
    },
    "pjcdawkins": {
      "name": "Pjcdawkins"
    },
    "prineet-kaurbhurji": {
      "name": "Prineet Kaurbhurji"
    },
    "quentin-sinig": {
      "name": "Quentin Sinig"
    },
    "ralt": {
      "name": "Florian Margaine",
      "github": "ralt",
      "linkedin": "https://www.linkedin.com/in/florian-margaine-43971136"
    },
    "ramanathanramakrishnamurthy": {
      "name": "Ramanathanramakrishnamurthy"
    },
    "remi-lejeune": {
      "name": "Rémi Lejeune"
    },
    "ribel": {
      "name": "Taras Kruts"
    },
    "robert-douglass": {
      "name": "Robert Douglass"
    },
    "rudy-weber": {
      "name": "Rudy Weber"
    },
    "ryan-hicks": {
      "name": "Ryan Hicks"
    },
    "sabri-helal": {
      "name": "Sabri Helal"
    },
    "savannah-bergeron": {
      "name": "Savannah Bergeron"
    },
    "shannon-vettes": {
      "name": "Shannon Vettes"
    },
    "shawn-ogasawara": {
      "name": "Shawn Ogasawara",
      "linkedin": "https://www.linkedin.com/in/shawn-ogasawara-83a9a0/"
    },
    "shawna-spoor": {
      "name": "Shawna Spoor"
    },
    "shedrack-akintayo": {
      "name": "Shedrack Akintayo"
    },
    "simon-ruggier": {
      "name": "Simon Ruggier"
    },
    "sophie-van-der-kindere": {
      "name": "Sophie Van Der Kindere"
    },
    "stefanos-thampis": {
      "name": "Stefanos Thampis"
    },
    "stephen-weinberg": {
      "name": "Stephen Weinberg"
    },
    "sukhman-virk": {
      "name": "Sukhman Virk"
    },
    "sumaira-nazir": {
      "name": "Sumaira Nazir"
    },
    "sumer": {
      "name": "Sümer Cip"
    },
    "syed-raza": {
      "name": "Syed Raza"
    },
    "tamara-bacchia": {
      "name": "Tamara Bacchia"
    },
    "tara-arnold": {
      "name": "Tara Arnold"
    },
    "theosakamg": {
      "name": "Mickael Gaillard",
      "github": "theosakamg"
    },
    "thomasdiluccio": {
      "name": "Thomas di Luccio"
    },
    "tim-anderson": {
      "name": "Tim Anderson"
    },
    "tom-helmer-hansen": {
      "name": "Tom Helmer Hansen"
    },
    "tylermills": {
      "name": "Tyler Mills"
    },
    "upsun": {
      "name": "Upsun"
    },
    "veronika-tolkachova": {
      "name": "Veronika Tolkachova",
      "linkedin": "https://www.linkedin.com/in/veronika-tolkachova-169167a2"
    },
    "vince-parker": {
      "name": "Vince Parker"
    },
    "vinnie-russo": {
      "name": "Vincenzo Russo"
    },
    "vrobert78": {
      "name": "Vincent Robert",
      "github": "vrobert78",
      "linkedin": "https://www.linkedin.com/in/vincent-robert-498a883"
    },
    "yuriy-babenko": {
      "name": "Yuriy Babenko"
    },
    "yuriy-gerasimov": {
      "name": "Yuriy Gerasimov"
    }
  };
  return <div className="post-meta">
      {(authors.length > 0 || formattedDate) && <div className="post-meta-info">
          {authors.length > 0 && <div className="post-meta-authors">
              {authors.map(slug => {
    const {name, url, avatarUrl} = resolveAuthor(slug);
    const inner = <>
                    {avatarUrl && <img src={avatarUrl} alt={name} className="post-meta-avatar" />}
                    <span className="post-meta-author-name">{name}</span>
                  </>;
    return url ? <a key={slug} href={url} target="_blank" rel="noopener noreferrer" className="post-meta-author">
                    {inner}
                  </a> : <span key={slug} className="post-meta-author">{inner}</span>;
  })}
            </div>}
          {authors.length > 0 && formattedDate && <span className="post-meta-separator" aria-hidden="true">·</span>}
          {formattedDate && <span className="post-meta-date">{formattedDate}</span>}
        </div>}
      {image && <img src={image} alt="" className="post-meta-image" aria-hidden="true" />}
    </div>;
};

<PostMeta data={{ author: ["flovntp"], date: "2025-02-12T18:30:05-04:00", image: "/images/posts/how-tos/install-pandoc-on-upsun/pandoc.webp" }} />

With the recent growth in enthusiasm for AI assistants, you may be looking for how these assistants can provide technical information about your product.
After years of remarkable achievements through web 2.0 and its famous `robots.txt`, `security.txt`, and `humans.txt`,
a new standard has been proposed to the web ecosystem and will soon become essential for the web: [`llms.txt`](https://llmstxt.org/).
`llms.txt` was conceived by [Jeremy Howard](https://github.com/jph00), co-founder of [Answer.AI](https://answerai.pro/), to address a fundamental challenge in AI-human interaction.

When AI assistants attempt to process standard web pages, they struggle with non-essential elements like navigation menus, scripts, and styling.
These elements consume valuable context space without contributing to the actual content understanding.
`llms.txt` provides an elegant solution: it delivers precisely curated information in a format that AI systems can efficiently process and understand.

If you need to convert files from one markup format to another, [Pandoc](https://pandoc.org/) is your swiss-army knife.
Developed by [John MacFarlane](https://github.com/jgm), Pandoc is a [Haskell](https://www.haskell.org/) library for converting from one markup format to another
and John provides in [this `pandoc` repo](https://github.com/jgm/pandoc) a command-line tool that uses this Pandoc library.
Easy to install and ready to convert.

In this How-to guide, we will see how to install this `pandoc` command line tool on your Upsun project.

<Note>
  **Assumptions:**

  * You already have an Upsun account. If you don’t, [please register for a trial account](https://auth.upsun.com/register). You can sign up with an email address or an existing GitHub, Bitbucket, or Google account. If you choose one of these accounts, you can set a password for your Upsun account later.
  * You have the [Upsun CLI](https://docs.upsun.com/administration/cli.html) installed locally.
  * You have the [Git CLI](https://github.com/git-guides/install-git) installed locally.
</Note>

For this tutorial, we will start with a basic HTML application.
The main goal of this tutorial is to showcase how to install `pandoc` on your project and quickly generate a `llms.txt` file from your HTML pages.

## Prepare your local HTML project

In order to quickly showcase the strength of Pandoc, we will simulate a simple HTML application that could be obtained using a static website generator like [Hugo](https://hugo.io/).
The proposed structure will be:

```
.upsun/
    config.yaml
  public/
    learn/
      api.html
      applications.html
    index.html
```

To do so, in your Terminal, execute the following commands:

```shell {filename="Terminal"} theme={null}
mkdir my-html-app
cd my-html-app
mkdir public
curl -L https://raw.githubusercontent.com/upsun/snippets/refs/heads/main/src/llms/html-app-example.tar.gz | tar -xvz - -C public
git init && git add . && git commit -m "init HTML app"
```

<Note>
  🚨 **Please note**: This `html-app-example.tar.gz` file contains all HTML files (`index.html`, `./learn/*.html`) in this [`llms` folder](https://github.com/upsun/snippets/tree/main/src/llms).
</Note>

## Give Pandoc a try

To showcase the power of [Pandoc](https://pandoc.org/), let's give it a try locally and convert our HTML to an `llms.txt` file.

### Install Pandoc locally

To install Pandoc locally, please follow the official [Installation Guide](https://pandoc.org/installing.html).

### Use Pandoc for HTML to Markdown conversion

You should now have access to `pandoc` tool and we will use it to generate a `public/llms-test.txt` file that will concatenate all the HTML pages of the project in Markdown.
Let's execute this command line that will look for all HTML files in the `public` folder and concat them in a single file `./public/llms-test.txt`:

```bash {filename="Terminal"} theme={null}
pandoc $(find ./public -iname "*.html" -type f | sort -d) -f html -s -o "./public/llms-test.txt" -t markdown
open public/llms-test.txt
```

Now that you can see the power of Pandoc, feel free to check the [Official Manual](https://pandoc.org/MANUAL.html) for more advanced usage.

## Use Pandoc in your Upsun project

Generating this `llms.txt` file locally and pushing it in your source code is not convenient.
We would like this generation to be dynamic, each time you update your website content.

### Init your Upsun config

Upsun CLI provides a command to initialize a basic config for your local code.
As it is a simple HTML app, we will generate a minimum configuration file using the following command:

```shell {filename="Terminal"} theme={null}
➜  my-html-app git:(main) upsun project:init
Welcome to Upsun!
Let's get started with a few questions.

We need to know a bit more about your project. This will only take a minute!

What language is your project using? We support the following: [JavaScript/Node.js]

Tell us your project's application name: [app]

                       (\_/)
We’re almost done...  =(^.^)=

Last but not least, unless you’re creating a static website, your project uses services. Let’s define them:

Select all the services you are using: []

You have not selected any service, would you like to proceed anyway? [Yes]

┌───────────────────────────────────────────────────┐
│   CONGRATULATIONS!                                │
│                                                   │
│   We have created the following files for your:   │
│     - .environment                                │
│     - .upsun/config.yaml                          │
│                                                   │
│   We’re jumping for joy! ⍢                        │
└───────────────────────────────────────────────────┘
         │ /
         │/
         │
  (\ /)
  ( . .)
  o (_(“)(“)
```

Please select

* `Javascript/Node.js`
* application name: `app`
* no service selected

Your HTML application is almost ready to be deployed on Upsun, one more step to go.

Update this config line into the newly created `.upsun/config.yaml` file for the router to point to your `public` folder:

```yaml {filename=".upsun/config.yaml",linenos=table,hl_lines=["6-7"],linenostart=1} theme={null}
applications:
  app:
    web:
      locations:
        "/":
          root: "public"
          index: ["index.html"]
          passthru: true
```

and then commit your updates:

```shell {filename="Terminal"} theme={null}
git add .upsun/config.yaml && git commit -m "change locations.root to the public folder"
```

### Create an Upsun project

You then need to create an Upsun project by executing these commands and follow the prompts:

```shell {filename="Terminal"} theme={null}
upsun project:create
upsun push
```

### Install Pandoc

There is to ways to install `pandoc` on your project:

* [using a shell script](#using-a-shell-script)
* [using Composable Image](#using-composable-image)

#### Using a shell script

[John MacFarlane](https://github.com/jgm) provides in his [Pandoc repo](https://github.com/jgm/pandoc) a quick and easy way to install Pandoc.

We've prepared a shell script for you ([source](https://github.com/upsun/snippets/blob/main/src/install-github-asset.sh)) that can be used to install the latest version of Pandoc.
Update your `.upsun/config.yaml` file and add this curl call in your `applications.app.hooks.build` step:

```yaml {filename=".upsun/config.yaml",linenos=table,hl_lines=["9-10"],linenostart=1} theme={null}
applications:
  app:
    type: "nodejs:20"
    #...
    hooks:
      build: |
        set -x -e
        #...
        curl -fsS https://raw.githubusercontent.com/upsun/snippets/refs/heads/main/src/install-github-asset.sh | bash /dev/stdin "jgm/pandoc" 
        pandoc -v
```

The [`install-pandoc.sh`](https://github.com/upsun/snippets/blob/main/src/install-github-asset.sh) script installs the `pandoc` binary from [Pandoc repo](https://github.com/jgm/pandoc) in the `/app/.global/bin` folder of your application container.

#### Using Composable image

The Upsun [Composable image](https://docs.upsun.com/create-apps/app-reference.html#composable-image-beta) provides enhanced flexibility when defining your app.
It allows you to install several runtimes and tools in your application container, in a “one image to rule them all” approach.

The composable image is built on [Nix](https://nix.dev/) and the good is [Pandoc package](https://search.nixos.org/packages?channel=24.11\&from=0\&size=50\&sort=relevance\&type=packages\&query=pandoc) is available.

Update your `.upsun/config.yaml` by commenting default `type` parameter and by adding the following lines:

```yaml {filename=".upsun/config.yaml",linenos=table,hl_lines=["3-5", "10"],linenostart=1} theme={null}
applications:
  app:
    #type: "nodejs:20"
    stack: 
      - pandoc
    #...
    hooks:
      build: |
        set -x -e
        pandoc -v
```

And then, deploy your updates:

```shell {filename="Terminal"} theme={null}
git add .upsun/config.yaml .environment && git commit -m "install Pandoc"
upsun push
```

### Use Pandoc dynamically

You can now use `pandoc` in your project to dynamically generate a `public/llms.txt` file that will concatenate all the HTML pages in Markdown, as [tested locally before](#give-pandoc-a-try).
Update your `.upsun/config.yaml` by adding the following lines:

```yaml {filename=".upsun/config.yaml",linenos=table,hl_lines=["8"],linenostart=1} theme={null}
applications:
  app:        
    #...
    hooks:
      build: |
        set -x -e
        #...
        pandoc $(find ./public -iname "*.html" -type f | sort -d) -f html -s -o "./public/llms.txt" -t markdown
```

And then, deploy your updates:

```shell {filename="Terminal"} theme={null}
git add .upsun/config.yaml && git commit -m "Use Pandoc to generate a public/llms.txt file"
upsun push
```

Test it works by accessing the file by adding `/llms.txt` to your environment URL:

```shell {filename="Terminal"} theme={null}
upsun env:url --primary
```

## Conclusion

Et voilà, we saw how to use `pandoc` to convert all existing HTML pages into a single Markdown `public/llms.txt` file. Now, perhaps the next step would be to train an AI Assistant with the file `llms.txt`...

Stay tuned.

Discover how to deploy a personal [Chainlit](https://github.com/Chainlit/chainlit) AI assistant on Upsun by reading this great blogpost: [Experiment with Chainlit AI interface with RAG on Upsun](https://devcenter.upsun.com/posts/hands-on/deploying-chainlit-with-rag)
