From aed9d21bb622e3579c3296a73a0240e36d04180d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Louf?= <remi@thetypicalset.com>
Date: Mon, 25 Mar 2024 12:08:09 +0100
Subject: [PATCH] Improve the documentation for structured generation

---
 docs/api/guide.md                |  1 +
 docs/reference/cfg.md            | 62 ++++++++++++++++++++++++++
 docs/reference/choices.md        | 18 ++++----
 docs/reference/custom_fsm_ops.md | 74 ++++++--------------------------
 docs/reference/json.md           | 52 +++++++++++++++++++---
 docs/reference/json_mode.md      | 17 ++++++++
 docs/reference/regex.md          | 26 +++++++++++
 docs/reference/samplers.md       |  5 +++
 docs/reference/text.md           | 54 ++++++++++++-----------
 docs/reference/types.md          | 23 +++++++---
 docs/stylesheets/extra.css       | 17 +++++++-
 docs/welcome.md                  |  8 ++--
 mkdocs.yml                       | 31 +++++--------
 13 files changed, 258 insertions(+), 130 deletions(-)
 create mode 100644 docs/api/guide.md
 create mode 100644 docs/reference/json_mode.md

diff --git a/docs/api/guide.md b/docs/api/guide.md
new file mode 100644
index 000000000..1c3160c8c
--- /dev/null
+++ b/docs/api/guide.md
@@ -0,0 +1 @@
+::: outlines.fsm.guide
diff --git a/docs/reference/cfg.md b/docs/reference/cfg.md
index 6877b0542..25cdbcf10 100644
--- a/docs/reference/cfg.md
+++ b/docs/reference/cfg.md
@@ -1 +1,63 @@
 # Grammar-structured generation
+
+You can pass any context-free grammar in the EBNF format and Outlines will generate an output that is valid to this grammar:
+
+```python
+from outlines import models, generate
+
+arithmetic_grammar = """
+    ?start: expression
+
+    ?expression: term (("+" | "-") term)*
+
+    ?term: factor (("*" | "/") factor)*
+
+    ?factor: NUMBER
+           | "-" factor
+           | "(" expression ")"
+
+    %import common.NUMBER
+"""
+
+model = models.transformers("WizardLM/WizardMath-7B-V1.1")
+generator = generate.cfg(model, arithmetic_grammar)
+sequence = generator(
+  "Alice had 4 apples and Bob ate 2. "
+  + "Write an expression for Alice's apples:"
+)
+
+print(sequence)
+# (8-2)
+```
+
+!!! Note "Performance"
+
+    The implementation of grammar-structured generation in Outlines is very naive. This does not reflect the performance of [.txt](https://dottxt.co)'s product, where we made grammar-structured generation as fast as regex-structured generation.
+
+
+## Ready-to-use grammars
+
+Outlines contains a (small) library of grammars that can be imported and use directly. We can rewrite the previous example as:
+
+```python
+from outlines import models, generate
+
+arithmetic_grammar = outlines.grammars.arithmetic
+
+model = models.transformers("WizardLM/WizardMath-7B-V1.1")
+generator = generate.cfg(model, arithmetic_grammar)
+sequence = generator(
+  "Alice had 4 apples and Bob ate 2. "
+  + "Write an expression for Alice's apples:"
+)
+
+print(sequence)
+# (8-2)
+```
+
+The following grammars are currently available:
+
+- Arithmetic grammar via `outlines.grammars.arithmetic`
+- JSON grammar via `outlines.grammars.json`
+
+If you would like more grammars to be added to the repository, please open an [issue](https://github.com/outlines-dev/outlines/issues) or a [pull request](https://github.com/outlines-dev/outlines/pulls).
diff --git a/docs/reference/choices.md b/docs/reference/choices.md
index 8e38f4a1a..275b4a9a0 100644
--- a/docs/reference/choices.md
+++ b/docs/reference/choices.md
@@ -1,14 +1,16 @@
 # Multiple choices
 
-Choice between different options
-In some cases we know the output is to be chosen between different options. We can restrict the completion’s output to these choices using the is_in keyword argument:
+Oultines allows you to make sure the generated text is chosen between different options:
 
 ```python
-import outlines.models as models
+from outlines import models, generate
+
+model = models.transformers("mistralai/Mistral-7B-v0.1")
+generator = generate.choice(model, ["skirt", "dress", "pen", "jacket"])
+answer = generator("Pick the odd word out: skirt, dress, pen, jacket")
 
-complete = models.openai("gpt-3.5-turbo")
-answer = complete(
-    "Pick the odd word out: skirt, dress, pen, jacket",
-    is_in=["skirt", "dress", "pen", "jacket"]
-)
 ```
+
+!!! Note "Performance"
+
+    `generation.choice` computes an index that helps Outlines guide generation. This can take some time, but only needs to be done once. If you want to generate from the same list of choices several times make sure that you only call `generate.choice` once.
diff --git a/docs/reference/custom_fsm_ops.md b/docs/reference/custom_fsm_ops.md
index 87b5bab35..22f5ca6bb 100644
--- a/docs/reference/custom_fsm_ops.md
+++ b/docs/reference/custom_fsm_ops.md
@@ -1,83 +1,37 @@
 # Custom FSM Operations
 
-```RegexFSM.from_interegular_fsm``` leverages the flexibility of ```interegular.FSM``` to use the available operations in ```interegular```.
+Outlines is fast because it compiles regular expressions into an index ahead of inference. To do so we use the equivalence between regular expressions and Finite State Machines (FSMs), and the library [interegular](https://github.com/MegaIng/interegular) to perform the translation.
 
-## Examples
+Alternatively, one can pass a FSM built using `integular` directly to structure the generation.
 
-### ```difference```
+## Example
 
-Returns an FSM which recognises only the strings recognised by the first FSM in the list, but none of the others.
+### Using the `difference` operation
+
+In the following example we build a fsm which recognizes only the strings valid to the first regular expression but not the second. In particular, it will prevent the words "pink" and "elephant" from being generated:
 
 ```python
+import interegular
+from outlines import models, generate
+
+
 list_of_strings_pattern = """\["[^"\s]*"(?:,"[^"\s]*")*\]"""
 pink_elephant_pattern = """.*(pink|elephant).*"""
 
 list_of_strings_fsm = interegular.parse_pattern(list_of_strings_pattern).to_fsm()
 pink_elephant_fsm = interegular.parse_pattern(pink_elephant_pattern).to_fsm()
 
-list_of_strings_fsm.accepts('["a","pink","elephant"]')
-# True
-
 difference_fsm = list_of_strings_fsm - pink_elephant_fsm
 
 difference_fsm_fsm.accepts('["a","pink","elephant"]')
 # False
 difference_fsm_fsm.accepts('["a","blue","donkey"]')
 # True
-```
-
-### ```union```
-
-Returns a finite state machine which accepts any sequence of symbols that is accepted by either self or other.
-
-```python
-list_of_strings_pattern = """\["[^"\s]*"(?:,"[^"\s]*")*\]"""
-tuple_of_strings_pattern = """\("[^"\s]*"(?:,"[^"\s]*")*\)"""
-
-list_of_strings_fsm = interegular.parse_pattern(list_of_strings_pattern).to_fsm()
-tuple_of_strings_fsm = interegular.parse_pattern(tuple_of_strings_pattern).to_fsm()
-
-list_of_strings_fsm.accepts('("a","pink","elephant")')
-# False
 
-union_fsm = list_of_strings_fsm|tuple_of_strings_fsm
 
-union_fsm.accepts('["a","pink","elephant"]')
-# True
-union_fsm.accepts('("a","blue","donkey")')
-# True
+model = models.transformers("mistralai/Mistral-7B-Instruct-v0.2")
+generator = generate.fsm(model, difference_fsm)
+response = generator("Don't talk about pink elephants")
 ```
 
-### ```intersection```
-
-Returns an FSM which accepts any sequence of symbols that is accepted by both of the original FSMs.
-
-```python
-list_of_strings_pattern = """\["[^"\s]*"(?:,"[^"\s]*")*\]"""
-pink_elephant_pattern = """.*(pink|elephant).*"""
-
-list_of_strings_fsm = interegular.parse_pattern(list_of_strings_pattern).to_fsm()
-pink_elephant_fsm = interegular.parse_pattern(pink_elephant_pattern).to_fsm()
-
-list_of_strings_fsm.accepts('["a","blue","donkey"]')
-# True
-
-intersection_fsm = list_of_strings_fsm & pink_elephant_fsm
-
-intersection_fsm.accepts('["a","pink","elephant"]')
-# True
-intersection_fsm.accepts('["a","blue","donkey"]')
-# False
-```
-
-_There are more operations available, we refer to https://github.com/MegaIng/interegular/blob/master/interegular/fsm.py._
-
-# Loading Custom FSM
-
-```python
-import outlines
-
-generator = outlines.generate.fsm(model, custom_fsm)
-
-response = generator(prompt)
-```
+To see the other operations available, consult [interegular's documentation](https://github.com/MegaIng/interegular/blob/master/interegular/fsm.py).
diff --git a/docs/reference/json.md b/docs/reference/json.md
index 84115af88..b13e707e1 100644
--- a/docs/reference/json.md
+++ b/docs/reference/json.md
@@ -1,4 +1,4 @@
-# Make the LLM follow a JSON Schema
+# JSON structured generation
 
 Outlines can make any open source model return a JSON object that follows a structure that is specified by the user. This is useful whenever we want the output of the model to be processed by code downstream: code does not understand natural language but rather the structured language it has been programmed to understand.
 
@@ -16,8 +16,7 @@ Outlines can infer the structure of the output from a Pydantic model. The result
 ```python
 from pydantic import BaseModel
 
-from outlines import models
-from outlines import text
+from outlines import models, generate
 
 
 class User(BaseModel):
@@ -27,20 +26,59 @@ class User(BaseModel):
 
 
 model = models.transformers("mistralai/Mistral-7B-v0.1")
-generator = text.generate.json(model, User)
-result = generator("Create a user profile with the fields name, last_name and id")
+generator = generate.json(model, User)
+result = generator(
+    "Create a user profile with the fields name, last_name and id"
+)
 print(result)
 # User(name="John", last_name="Doe", id=11)
 ```
 
-!!! warning "JSON and whitespaces"
+!!! Note "JSON and whitespaces"
 
     By default Outlines lets model choose the number of linebreaks and white spaces used to structure the JSON. Small models tend to struggle with this, in which case we recommend to set the value of the parameter `whitespace_pattern` to the empty string:
 
     ```python
-    generator = text.generate.json(model, User, whitespace_pattern="")
+    generator = generate.json(model, User, whitespace_pattern="")
     ```
 
+!!! Note "Performance"
+
+    `generation.json` computes an index that helps Outlines guide generation. This can take some time, but only needs to be done once. If you want to generate several times with the same schema make sure that you only call `generate.json` once.
+
+
+## Using a JSON Schema
+
+Instead of a Pydantic model you can pass a string that represents a [JSON Schema](https://json-schema.org/) specification to `generate.json`:
+
+```python
+from pydantic import BaseModel
+
+from outlines import models
+from outlines import text
+
+model = models.transformers("mistralai/Mistral-7B-v0.1")
+
+schema = """
+{
+  "title": "User",
+  "type": "object",
+  "properties": {
+    "name": {"type": "string"},
+    "last_name": {"type": "string"},
+    "id": {"type": "integer"}
+  }
+}
+"""
+
+generator = generate.json(model, schema)
+result = generator(
+    "Create a user profile with the fields name, last_name and id"
+)
+print(result)
+# User(name="John", last_name="Doe", id=11)
+```
+
 ## From a function's signature
 
 Outlines can infer the structure of the output from the signature of a function. The result is a dictionary, and can be passed directly to the function using the usual dictionary expansion syntax `**`:
diff --git a/docs/reference/json_mode.md b/docs/reference/json_mode.md
new file mode 100644
index 000000000..bb1c1c7a8
--- /dev/null
+++ b/docs/reference/json_mode.md
@@ -0,0 +1,17 @@
+# JSON mode
+
+Outlines can guarantee that the LLM will generate valid JSON, using [Grammar-structured generation](cfg.md):
+
+```python
+from outlines import models, generate
+
+json_grammar = outlines.grammars.json
+
+model = models.transformers("mistralai/Mistral-7b-v0.1")
+generator = generate.cfg(model, json_grammar)
+sequence = generator("Generate valid JSON")
+```
+
+!!! Note "JSON that follows a schema"
+
+    If you want to guarantee that the generated JSON follows a given schema, consult [this section](json.md) instead.
diff --git a/docs/reference/regex.md b/docs/reference/regex.md
index 1bdb8afe0..093dd0e18 100644
--- a/docs/reference/regex.md
+++ b/docs/reference/regex.md
@@ -1 +1,27 @@
 # Regular expressions
+
+Outlines can guarantee that the text generated by the LLM will be valid to a regular expression:
+
+```python
+from outlines import models, generate
+
+model = models.transformers("mistralai/Mistral-7B-Instruct-v0.2")
+
+generator = generate.regex(
+    model,
+    r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)",
+)
+
+prompt = "What is the IP address of the Google DNS servers? "
+answer = generator(prompt, max_tokens=30)
+
+print(answer)
+# What is the IP address of the Google DNS servers?
+# 2.2.6.1
+```
+
+If you find yourself using `generate.regex` to restrict the answers' type you can take a look at [type-structured generation](types.md) instead.
+
+!!! Note "Performance"
+
+    `generate.regex` computes an index that helps Outlines guide generation. This can take some time, but only needs to be done once. If you want to generate several times using the same regular expression make sure that you only call `generate.regex` once.
diff --git a/docs/reference/samplers.md b/docs/reference/samplers.md
index ec0729d63..a0f652bbf 100644
--- a/docs/reference/samplers.md
+++ b/docs/reference/samplers.md
@@ -108,3 +108,8 @@ answer = generator("What is 2+2?")
 print(answer)
 # 4
 ```
+
+
+!!! Warning "Compatibility"
+
+    Only models from the `transformers`  and `exllamav2 ` libraries are compatible with Beam Search.
diff --git a/docs/reference/text.md b/docs/reference/text.md
index adf096411..f364c3d2e 100644
--- a/docs/reference/text.md
+++ b/docs/reference/text.md
@@ -1,6 +1,10 @@
 # Text generation
 
-Outlines provides a unified interface to generate text with many language models, API-based and local:
+Outlines provides a unified interface to generate text with many language models, API-based and local. The same pattern is used throughout the library:
+
+1. Instantiate a generator by calling `outlines.generate.text` with the model to be used.
+2. Call the generator with the prompt and (optionally) some generation parameters.
+
 
 ```python
 from outlines import models, generate
@@ -14,15 +18,12 @@ generator = generate.text(model)
 answer = generator("What is 2+2?")
 ```
 
-We generate text in two steps:
-
-1. Instantiate a generator with the model you want to use
-2. Call the generator with the prompt
+By default Outlines uses the multinomial sampler with `temperature=1`. See [this section](samplers.md) to learn how to use different samplers.
 
+## Streaming
 
-## Limit the number of tokens generated
+Outlines allows you to stream the model's response by calling the `.stream` method of the generator with the prompt:
 
-To limit the number of tokens generated you can pass the `max_tokens` positional argument to the generator:
 
 ```python
 from outlines import models, generate
@@ -30,14 +31,16 @@ from outlines import models, generate
 model = models.transformers("mistralai/Mistral-7B-v0.1")
 generator = generate.text(model)
 
-answer = generator("What is 2+2?", 5)
-answer = generator("What is 2+2?", max_tokens=5)
+tokens = generator.stream("What is 2+2?")
+for token in tokens:
+    print(token)
 ```
 
-## Stop when a given string is found
+## Parameters
 
-You can also ask the model to stop generating text after a given string has been generated, for instance a period or a line break. You can pass a string or a line of string for the `stop_at` argument:
+### Limit the number of tokens generated
 
+To limit the number of tokens generated you can pass the `max_tokens` positional argument to the generator:
 
 ```python
 from outlines import models, generate
@@ -45,13 +48,13 @@ from outlines import models, generate
 model = models.transformers("mistralai/Mistral-7B-v0.1")
 generator = generate.text(model)
 
-answer = generator("What is 2+2?", stop_at=".")
-answer = generator("What is 2+2?", stop_at=[".", "\n"])
+answer = generator("What is 2+2?", 5)
+answer = generator("What is 2+2?", max_tokens=5)
 ```
 
-## Streaming
+### Stop after a given string is generated
 
-Outlines allows you to stream the model's response by calling the `.stream` method of the generator with the prompt:
+You can also ask the model to stop generating text after a given string has been generated, for instance a period or a line break. You can pass a string or a line of string for the `stop_at` argument:
 
 
 ```python
@@ -60,22 +63,25 @@ from outlines import models, generate
 model = models.transformers("mistralai/Mistral-7B-v0.1")
 generator = generate.text(model)
 
-tokens = generator.stream("What is 2+2?")
-for token in tokens:
-    print(token)
+answer = generator("What is 2+2?", stop_at=".")
+answer = generator("What is 2+2?", stop_at=[".", "\n"])
 ```
 
-## Use a different sampler
+*The stopping string will be included in the response.*
+
 
-Outlines uses the multinomial sampler by default. To specify another sampler, for instance the greedy sampler you need to specify it when instantiating the generator:
+### Seed the generation
+
+It can be useful to seed the generation in order to get reproducible results:
 
 ```python
+import torch
 from outlines import models, generate
-from outlines.generate.samplers import greedy
-
 
 model = models.transformers("mistralai/Mistral-7B-v0.1")
-generator = generate.text(model, sampler=greedy)
 
-tokens = generator("What is 2+2?")
+rng = torch.Generator(device="cuda")
+rng.manual_seed(789001)
+
+answer = generator("What is 2+2?", rng=rng)
 ```
diff --git a/docs/reference/types.md b/docs/reference/types.md
index a10013e45..a82d521ae 100644
--- a/docs/reference/types.md
+++ b/docs/reference/types.md
@@ -1,13 +1,22 @@
 # Type constraints
 
-We can ask completions to be restricted to valid integers or floating-point numbers using the `type` keyword argument, respectively with the “int” or “float” value:
+We can ask completions to be restricted to valid python types:
 
 ```python
-import outlines.models as models
+from outlines import models, generate
 
-complete = models.openai("gpt-3.5-turbo")
-answer = complete(
-    "When I was 6 my sister was half my age. Now I’m 70 how old is my sister?",
-    type="int"
-)
+model = models.transformers("mistralai/Mistral-7B-v0.1")
+generator = generate.format(model, int)
+answer = generator("When I was 6 my sister was half my age. Now I’m 70 how old is my sister?")
+print(answer)
+# 67
 ```
+
+The following types are currently available:
+
+- int
+- float
+- bool
+- datetime.date
+- datetime.time
+- datetime.datetime
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css
index 6c6e62219..d2cae9007 100644
--- a/docs/stylesheets/extra.css
+++ b/docs/stylesheets/extra.css
@@ -9,10 +9,12 @@
   --md-code-fg-color: #FFFFFF;
   --md-text-font-family: "Inter";
   --md-code-font: "Source Code Pro Custom";
+  --md-default-fg-color--light: #D8DEE9;
+  --md-default-fg-color--lighter: #E5E9F0;
+  --md-default-fg-color--lightest: #ECEFF4;
 }
 
 .index-pre-code {
-  margin: 50px;
   max-width: 700px;
   left: 50%;
 }
@@ -22,6 +24,7 @@
 }
 
 .md-typeset pre>code {
+  padding: 1rem;
   border-radius: 1rem;
   box-shadow: 10px 5px 5px #D8DEE9;
 }
@@ -30,6 +33,10 @@
   font-weight: 500;
 }
 
+.md-typeset pre {
+  margin: 2rem;
+}
+
 .language-python {
   background: #FFFFFF ! important
 }
@@ -65,3 +72,11 @@ span.md-ellipsis {
 .md-nav__link--active {
   background-color: #ECEFF4;
 }
+
+.md-typeset ol li {
+  margin-bottom: .1rem;
+}
+
+.md-typeset ul li {
+  margin-bottom: .1rem;
+}
diff --git a/docs/welcome.md b/docs/welcome.md
index ca619e56c..736bec5d0 100644
--- a/docs/welcome.md
+++ b/docs/welcome.md
@@ -6,7 +6,7 @@ Outlines〰 is a Python library that allows you to use Large Language Model in a
 
 ## What models do you support?
 
-We support Openai, but the true power of Outlines〰 is unleashed with Open Source models available via the Transformers, llama.cpp, exllama2 and mamba_ssm libraries. If you want to build and maintain an integration with another library, [get in touch][discord].
+We support [Openai](reference/models/openai.md), but the true power of Outlines〰 is unleashed with Open Source models available via the [transformers](reference/models/transformers.md), [llama.cpp](reference/models/transformers.md), [exllama2](reference/models/exllama2.md) and [mamba_ssm](reference/models/mamba.md) libraries. If you want to build and maintain an integration with another library, [get in touch][discord].
 
 ## What are the main features?
 
@@ -47,6 +47,7 @@ We support Openai, but the true power of Outlines〰 is unleashed with Open Sour
 
 ## Why Outlines over alternatives?
 
+
 Outlines〰 is built at [.txt][.txt] by engineers with decades of experience in software engineering, machine learning (Bayesian Statistics and NLP), and compilers. [.txt][.txt] is a VC-backed company fully focused on the topic of structured generation and is committed to make the community benefit from its experience.
 
 We are also open source veterans and have authored/maintained many libraries over the years: the [Aesara][aesara]{:target="_blank"} and [Pythological][pythological]{:target="_blank"} ecosystems, [Blackjax][blackjax]{:target="_blank"} and [Hy][hy]{:target="_blank"} among many others.
@@ -54,6 +55,7 @@ We are also open source veterans and have authored/maintained many libraries ove
 
 Outlines does not use unnecessary abstractions that tend to get in your way. We have a laser focus on reliable text generation with LLMs, a clear roadmap to push the state of the art in this area and a commitment to clean and robust code.
 
+And last but not least, unlike alternatives, Outlines' structured generation introduces **no overhead** during inference.
 
 ## Philosophy
 
@@ -62,7 +64,7 @@ more flexible replacement for the `generate` method in the
 [transformers](https://github.com/huggingface/transformers) library.
 
 **Outlines** 〰 helps developers *structure text generation* to build robust
-interfaces with external systems. Provides generation methods that
+interfaces with external systems. It provides generation methods that
 guarantee that the output will match a regular expressions, or follow
 a JSON schema.
 
@@ -76,7 +78,7 @@ and generation can be interleaved with control flow, conditionals, custom Python
 functions and calls to other libraries.
 
 **Outlines** 〰 is *compatible with every auto-regressive model*. It only interfaces with models
-via the next-token logits.
+via the next-token logits distribution.
 
 
 ## Acknowledgements
diff --git a/mkdocs.yml b/mkdocs.yml
index 7710e88e9..75de2f824 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -45,20 +45,6 @@ extra:
   analytics:
     provider: google
     property: !ENV GOOGLE_ANALYTICS_KEY
-    feedback:
-      title: Was this page helpful?
-      ratings:
-        - icon: material/thumb-up-outline
-          name: This page was helpful
-          data: 1
-          note: >-
-            Thanks for your feedback!
-        - icon: material/thumb-down-outline
-          name: This page could be improved
-          data: 0
-          note: >-
-            Thanks for your feedback! Help us improve this page by
-            using our <a href="..." target="_blank" rel="noopener">feedback form</a>.
 
 # Extensions
 markdown_extensions:
@@ -115,14 +101,16 @@ nav:
       - Playing chess: cookbook/models_playing_chess.md
   - Docs:
     - reference/index.md
-    - Text generation:
-        - Free text: reference/text.md
+    - Generation:
+        - Text: reference/text.md
+        - Samplers: reference/samplers.md
+    - Structured generation:
         - Classification: reference/choices.md
-        - JSON: reference/json.md
-        - Grammar: reference/cfg.md
         - Regex: reference/regex.md
-        - Types: reference/types.md
-        - Samplers: reference/samplers.md
+        - Type constraints: reference/types.md
+        - JSON (function calling): reference/json.md
+        - JSON mode: reference/json_mode.md
+        - Grammar: reference/cfg.md
         - Custom FSM operations: reference/custom_fsm_ops.md
     - Utilities:
         - Serve with vLLM: reference/vllm.md
@@ -131,6 +119,9 @@ nav:
     - Models:
         - OpenAI: reference/models/openai.md
         - Llama.cpp: reference/models/llamacpp.md
+        - Transformers: reference/models/transformers.md
+        - ExllamaV2: reference/models/exllamav2.md
+        - Mamba: reference/models/mamba.md
 
   - API Reference:
     - api/index.md