From c1ec04a13395bedac66afa46d85010623e47de45 Mon Sep 17 00:00:00 2001
From: Zain Hoda <7146154+zainhoda@users.noreply.github.com>
Date: Mon, 8 Jan 2024 14:13:06 -0500
Subject: [PATCH 1/3] Add Marqo

---
 src/vanna/marqo/__init__.py |   0
 src/vanna/marqo/marqo.py    | 166 ++++++++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+)
 create mode 100644 src/vanna/marqo/__init__.py
 create mode 100644 src/vanna/marqo/marqo.py

diff --git a/src/vanna/marqo/__init__.py b/src/vanna/marqo/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/vanna/marqo/marqo.py b/src/vanna/marqo/marqo.py
new file mode 100644
index 00000000..168745c7
--- /dev/null
+++ b/src/vanna/marqo/marqo.py
@@ -0,0 +1,166 @@
+import json
+import uuid
+from abc import abstractmethod
+
+import marqo
+
+import pandas as pd
+
+from ..base import VannaBase
+
+
+class Marqo_VectorStore(VannaBase):
+    def __init__(self, config=None):
+        VannaBase.__init__(self, config=config)
+        
+        if config is not None and "marqo_url" in config:
+            marqo_url = config["marqo_url"]
+        else:
+            marqo_url = "http://localhost:8882"
+
+        if config is not None and "marqo_model" in config:
+            marqo_model = config["marqo_model"]
+        else:
+            marqo_model = "hf/all_datasets_v4_MiniLM-L6"
+        
+        self.mq = marqo.Client(url=marqo_url)
+
+        for index in ["vanna-sql", "vanna-ddl", "vanna-doc"]:
+            try:
+                self.mq.create_index(index, model=marqo_model)
+            except Exception as e:
+                print(e)
+                print(f"Marqo index {index} already exists")
+                pass
+
+
+    def generate_embedding(self, data: str, **kwargs) -> list[float]:
+        # Marqo doesn't need to generate embeddings
+        pass        
+
+    def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
+        id = str(uuid.uuid4())+"-sql"
+        question_sql_dict ={
+                "question": question,
+                "sql": sql,
+                "_id": id,
+            }
+
+        self.mq.index("vanna-sql").add_documents(
+            [question_sql_dict],
+            tensor_fields=["question", "sql"],
+        )
+
+        return id
+
+    def add_ddl(self, ddl: str, **kwargs) -> str:
+        id = str(uuid.uuid4())+"-ddl"
+        ddl_dict ={
+                "ddl": ddl,
+                "_id": id,
+            }
+
+        self.mq.index("vanna-ddl").add_documents(
+            [ddl_dict],
+            tensor_fields=["ddl"],
+        )
+        return id
+
+    def add_documentation(self, doc: str, **kwargs) -> str:
+        id = str(uuid.uuid4())+"-doc"
+        doc_dict ={
+                "doc": doc,
+                "_id": id,
+            }
+        
+        self.mq.index("vanna-doc").add_documents(
+            [doc_dict],
+            tensor_fields=["doc"],
+        )
+        return id
+
+    def get_training_data(self, **kwargs) -> pd.DataFrame:
+        data = []  
+
+        for hit in self.mq.index('vanna-doc').search("", limit=1000)['hits']:
+            data.append({
+                "id": hit["_id"],
+                "training_data_type": "documentation",
+                "question": "",
+                "content": hit["doc"],
+            })
+
+        for hit in self.mq.index('vanna-ddl').search("", limit=1000)['hits']:
+            data.append({
+                "id": hit["_id"],
+                "training_data_type": "ddl",
+                "question": "",
+                "content": hit["ddl"],
+            })
+
+        for hit in self.mq.index('vanna-sql').search("", limit=1000)['hits']:
+            data.append({
+                "id": hit["_id"],
+                "training_data_type": "sql",
+                "question": hit["question"],
+                "content": hit["sql"],
+            })
+
+        df = pd.DataFrame(data)
+
+        return df
+
+    def remove_training_data(self, id: str, **kwargs) -> bool:
+        if id.endswith("-sql"):
+            self.mq.index("vanna-sql").delete_documents(ids=[id])
+            return True
+        elif id.endswith("-ddl"):
+            self.mq.index("vanna-ddl").delete_documents(ids=[id])
+            return True
+        elif id.endswith("-doc"):
+            self.mq.index("vanna-doc").delete_documents(ids=[id])
+            return True
+        else:
+            return False
+
+    # Static method to extract the documents from the results of a query
+    @staticmethod
+    def _extract_documents(data) -> list:
+        # Check if 'hits' key is in the dictionary and if it's a list
+        if 'hits' in data and isinstance(data['hits'], list):
+            # Iterate over each item in 'hits'
+
+            if len(data['hits']) == 0:
+                return []
+
+            # If there is a "doc" key, return the value of that key
+            if "doc" in data['hits'][0]:
+                return [hit["doc"] for hit in data['hits']]
+            
+            # If there is a "ddl" key, return the value of that key
+            if "ddl" in data['hits'][0]:
+                return [hit["ddl"] for hit in data['hits']]
+            
+            # Otherwise return the entire hit
+            return [
+                {key: value for key, value in hit.items() if not key.startswith('_')}
+                for hit in data['hits']
+            ]
+        else:
+            # Return an empty list if 'hits' is not found or not a list
+            return []
+
+    def get_similar_question_sql(self, question: str, **kwargs) -> list:
+        return Marqo_VectorStore._extract_documents(
+            self.mq.index('vanna-sql').search(question)
+        )
+
+    def get_related_ddl(self, question: str, **kwargs) -> list:
+        return Marqo_VectorStore._extract_documents(
+            self.mq.index('vanna-ddl').search(question)
+        )
+
+    def get_related_documentation(self, question: str, **kwargs) -> list:
+        return Marqo_VectorStore._extract_documents(
+            self.mq.index('vanna-doc').search(question)
+        )

From 206f2b35082410bc0ecf0155751729deff72374f Mon Sep 17 00:00:00 2001
From: Zain Hoda <7146154+zainhoda@users.noreply.github.com>
Date: Mon, 8 Jan 2024 14:21:54 -0500
Subject: [PATCH 2/3] Add Marqo notebooks

---
 notebooks/bigquery-mistral-chromadb.ipynb                     | 2 +-
 notebooks/bigquery-mistral-marqo.ipynb                        | 1 +
 notebooks/bigquery-mistral-other-vectordb.ipynb               | 2 +-
 notebooks/bigquery-mistral-vannadb.ipynb                      | 2 +-
 notebooks/bigquery-openai-azure-chromadb.ipynb                | 2 +-
 notebooks/bigquery-openai-azure-marqo.ipynb                   | 1 +
 notebooks/bigquery-openai-azure-other-vectordb.ipynb          | 2 +-
 notebooks/bigquery-openai-azure-vannadb.ipynb                 | 2 +-
 notebooks/bigquery-openai-standard-chromadb.ipynb             | 2 +-
 notebooks/bigquery-openai-standard-marqo.ipynb                | 1 +
 notebooks/bigquery-openai-standard-other-vectordb.ipynb       | 2 +-
 notebooks/bigquery-openai-standard-vannadb.ipynb              | 2 +-
 notebooks/bigquery-openai-vanna-chromadb.ipynb                | 2 +-
 notebooks/bigquery-openai-vanna-marqo.ipynb                   | 1 +
 notebooks/bigquery-openai-vanna-other-vectordb.ipynb          | 2 +-
 notebooks/bigquery-openai-vanna-vannadb.ipynb                 | 2 +-
 notebooks/bigquery-other-llm-chromadb.ipynb                   | 2 +-
 notebooks/bigquery-other-llm-marqo.ipynb                      | 1 +
 notebooks/bigquery-other-llm-other-vectordb.ipynb             | 2 +-
 notebooks/bigquery-other-llm-vannadb.ipynb                    | 2 +-
 notebooks/other-database-mistral-chromadb.ipynb               | 2 +-
 notebooks/other-database-mistral-marqo.ipynb                  | 1 +
 notebooks/other-database-mistral-other-vectordb.ipynb         | 2 +-
 notebooks/other-database-mistral-vannadb.ipynb                | 2 +-
 notebooks/other-database-openai-azure-chromadb.ipynb          | 2 +-
 notebooks/other-database-openai-azure-marqo.ipynb             | 1 +
 notebooks/other-database-openai-azure-other-vectordb.ipynb    | 2 +-
 notebooks/other-database-openai-azure-vannadb.ipynb           | 2 +-
 notebooks/other-database-openai-standard-chromadb.ipynb       | 2 +-
 notebooks/other-database-openai-standard-marqo.ipynb          | 1 +
 notebooks/other-database-openai-standard-other-vectordb.ipynb | 2 +-
 notebooks/other-database-openai-standard-vannadb.ipynb        | 2 +-
 notebooks/other-database-openai-vanna-chromadb.ipynb          | 2 +-
 notebooks/other-database-openai-vanna-marqo.ipynb             | 1 +
 notebooks/other-database-openai-vanna-other-vectordb.ipynb    | 2 +-
 notebooks/other-database-openai-vanna-vannadb.ipynb           | 2 +-
 notebooks/other-database-other-llm-chromadb.ipynb             | 2 +-
 notebooks/other-database-other-llm-marqo.ipynb                | 1 +
 notebooks/other-database-other-llm-other-vectordb.ipynb       | 2 +-
 notebooks/other-database-other-llm-vannadb.ipynb              | 2 +-
 notebooks/postgres-mistral-chromadb.ipynb                     | 2 +-
 notebooks/postgres-mistral-marqo.ipynb                        | 1 +
 notebooks/postgres-mistral-other-vectordb.ipynb               | 2 +-
 notebooks/postgres-mistral-vannadb.ipynb                      | 2 +-
 notebooks/postgres-openai-azure-chromadb.ipynb                | 2 +-
 notebooks/postgres-openai-azure-marqo.ipynb                   | 1 +
 notebooks/postgres-openai-azure-other-vectordb.ipynb          | 2 +-
 notebooks/postgres-openai-azure-vannadb.ipynb                 | 2 +-
 notebooks/postgres-openai-standard-chromadb.ipynb             | 2 +-
 notebooks/postgres-openai-standard-marqo.ipynb                | 1 +
 notebooks/postgres-openai-standard-other-vectordb.ipynb       | 2 +-
 notebooks/postgres-openai-standard-vannadb.ipynb              | 2 +-
 notebooks/postgres-openai-vanna-chromadb.ipynb                | 2 +-
 notebooks/postgres-openai-vanna-marqo.ipynb                   | 1 +
 notebooks/postgres-openai-vanna-other-vectordb.ipynb          | 2 +-
 notebooks/postgres-openai-vanna-vannadb.ipynb                 | 2 +-
 notebooks/postgres-other-llm-chromadb.ipynb                   | 2 +-
 notebooks/postgres-other-llm-marqo.ipynb                      | 1 +
 notebooks/postgres-other-llm-other-vectordb.ipynb             | 2 +-
 notebooks/postgres-other-llm-vannadb.ipynb                    | 2 +-
 notebooks/snowflake-mistral-chromadb.ipynb                    | 2 +-
 notebooks/snowflake-mistral-marqo.ipynb                       | 1 +
 notebooks/snowflake-mistral-other-vectordb.ipynb              | 2 +-
 notebooks/snowflake-mistral-vannadb.ipynb                     | 2 +-
 notebooks/snowflake-openai-azure-chromadb.ipynb               | 2 +-
 notebooks/snowflake-openai-azure-marqo.ipynb                  | 1 +
 notebooks/snowflake-openai-azure-other-vectordb.ipynb         | 2 +-
 notebooks/snowflake-openai-azure-vannadb.ipynb                | 2 +-
 notebooks/snowflake-openai-standard-chromadb.ipynb            | 2 +-
 notebooks/snowflake-openai-standard-marqo.ipynb               | 1 +
 notebooks/snowflake-openai-standard-other-vectordb.ipynb      | 2 +-
 notebooks/snowflake-openai-standard-vannadb.ipynb             | 2 +-
 notebooks/snowflake-openai-vanna-chromadb.ipynb               | 2 +-
 notebooks/snowflake-openai-vanna-marqo.ipynb                  | 1 +
 notebooks/snowflake-openai-vanna-other-vectordb.ipynb         | 2 +-
 notebooks/snowflake-openai-vanna-vannadb.ipynb                | 2 +-
 notebooks/snowflake-other-llm-chromadb.ipynb                  | 2 +-
 notebooks/snowflake-other-llm-marqo.ipynb                     | 1 +
 notebooks/snowflake-other-llm-other-vectordb.ipynb            | 2 +-
 notebooks/snowflake-other-llm-vannadb.ipynb                   | 2 +-
 notebooks/sqlite-mistral-chromadb.ipynb                       | 2 +-
 notebooks/sqlite-mistral-marqo.ipynb                          | 1 +
 notebooks/sqlite-mistral-other-vectordb.ipynb                 | 2 +-
 notebooks/sqlite-mistral-vannadb.ipynb                        | 2 +-
 notebooks/sqlite-openai-azure-chromadb.ipynb                  | 2 +-
 notebooks/sqlite-openai-azure-marqo.ipynb                     | 1 +
 notebooks/sqlite-openai-azure-other-vectordb.ipynb            | 2 +-
 notebooks/sqlite-openai-azure-vannadb.ipynb                   | 2 +-
 notebooks/sqlite-openai-standard-chromadb.ipynb               | 2 +-
 notebooks/sqlite-openai-standard-marqo.ipynb                  | 1 +
 notebooks/sqlite-openai-standard-other-vectordb.ipynb         | 2 +-
 notebooks/sqlite-openai-standard-vannadb.ipynb                | 2 +-
 notebooks/sqlite-openai-vanna-chromadb.ipynb                  | 2 +-
 notebooks/sqlite-openai-vanna-marqo.ipynb                     | 1 +
 notebooks/sqlite-openai-vanna-other-vectordb.ipynb            | 2 +-
 notebooks/sqlite-openai-vanna-vannadb.ipynb                   | 2 +-
 notebooks/sqlite-other-llm-chromadb.ipynb                     | 2 +-
 notebooks/sqlite-other-llm-marqo.ipynb                        | 1 +
 notebooks/sqlite-other-llm-other-vectordb.ipynb               | 2 +-
 notebooks/sqlite-other-llm-vannadb.ipynb                      | 2 +-
 pyproject.toml                                                | 3 ++-
 101 files changed, 102 insertions(+), 76 deletions(-)
 create mode 100644 notebooks/bigquery-mistral-marqo.ipynb
 create mode 100644 notebooks/bigquery-openai-azure-marqo.ipynb
 create mode 100644 notebooks/bigquery-openai-standard-marqo.ipynb
 create mode 100644 notebooks/bigquery-openai-vanna-marqo.ipynb
 create mode 100644 notebooks/bigquery-other-llm-marqo.ipynb
 create mode 100644 notebooks/other-database-mistral-marqo.ipynb
 create mode 100644 notebooks/other-database-openai-azure-marqo.ipynb
 create mode 100644 notebooks/other-database-openai-standard-marqo.ipynb
 create mode 100644 notebooks/other-database-openai-vanna-marqo.ipynb
 create mode 100644 notebooks/other-database-other-llm-marqo.ipynb
 create mode 100644 notebooks/postgres-mistral-marqo.ipynb
 create mode 100644 notebooks/postgres-openai-azure-marqo.ipynb
 create mode 100644 notebooks/postgres-openai-standard-marqo.ipynb
 create mode 100644 notebooks/postgres-openai-vanna-marqo.ipynb
 create mode 100644 notebooks/postgres-other-llm-marqo.ipynb
 create mode 100644 notebooks/snowflake-mistral-marqo.ipynb
 create mode 100644 notebooks/snowflake-openai-azure-marqo.ipynb
 create mode 100644 notebooks/snowflake-openai-standard-marqo.ipynb
 create mode 100644 notebooks/snowflake-openai-vanna-marqo.ipynb
 create mode 100644 notebooks/snowflake-other-llm-marqo.ipynb
 create mode 100644 notebooks/sqlite-mistral-marqo.ipynb
 create mode 100644 notebooks/sqlite-openai-azure-marqo.ipynb
 create mode 100644 notebooks/sqlite-openai-standard-marqo.ipynb
 create mode 100644 notebooks/sqlite-openai-vanna-marqo.ipynb
 create mode 100644 notebooks/sqlite-other-llm-marqo.ipynb

diff --git a/notebooks/bigquery-mistral-chromadb.ipynb b/notebooks/bigquery-mistral-chromadb.ipynb
index 73c2a533..11db07f1 100644
--- a/notebooks/bigquery-mistral-chromadb.ipynb
+++ b/notebooks/bigquery-mistral-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "a4b4564c-96e3-553f-bd3a-fd78ff2c05fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a01f90bd-2a30-54e9-b224-6e41192bc874", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f73ae093-a468-58f7-8928-c8357277a05b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "604afb48-6829-54a6-b55b-447e57aad294", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,bigquery]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "a873b3f2-11d7-585f-b1e8-b119508eaf35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "a4b4564c-96e3-553f-bd3a-fd78ff2c05fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a01f90bd-2a30-54e9-b224-6e41192bc874", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ab7feb46-14af-52ba-a67c-ed97203abb1b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "604afb48-6829-54a6-b55b-447e57aad294", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,bigquery]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "a873b3f2-11d7-585f-b1e8-b119508eaf35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-mistral-marqo.ipynb b/notebooks/bigquery-mistral-marqo.ipynb
new file mode 100644
index 00000000..2d5f48b5
--- /dev/null
+++ b/notebooks/bigquery-mistral-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "fda9bc52-4501-5569-953e-0f1ebff8b5eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ba570f08-fcb8-5fa8-967b-7c3e400d610d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0ef5320d-f0cc-57b8-993e-f938ff50b6ba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "fedb304d-43d5-5d20-9a58-9967908a4dc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,mistralai,bigquery]'"}, {"id": "696e9c9c-d724-56db-9a06-10a66dc15b0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "de83d46a-6116-5978-8b9a-f0f586a583bd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, Mistral):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "15b5737f-f7ff-5f9e-89fb-c06431c6a802", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-mistral-other-vectordb.ipynb b/notebooks/bigquery-mistral-other-vectordb.ipynb
index 3806bdcd..aafa6fec 100644
--- a/notebooks/bigquery-mistral-other-vectordb.ipynb
+++ b/notebooks/bigquery-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "95ac2fed-f042-5ddb-8a43-6055e0eb437b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a90edad1-d1a3-5c7c-83fb-e4455e43ea8c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0dfaee91-c14e-58d8-b1ab-adec336a6277", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "81efadca-5a5f-5a99-8133-b7fa84f17803", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,bigquery]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "8530cf11-bed9-5933-b8ea-644361db0e5e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "95ac2fed-f042-5ddb-8a43-6055e0eb437b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a90edad1-d1a3-5c7c-83fb-e4455e43ea8c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "88901dbc-14f4-5279-a59f-268e126a26f9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "81efadca-5a5f-5a99-8133-b7fa84f17803", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,bigquery]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "8530cf11-bed9-5933-b8ea-644361db0e5e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-mistral-vannadb.ipynb b/notebooks/bigquery-mistral-vannadb.ipynb
index 2a87500c..f8d107b0 100644
--- a/notebooks/bigquery-mistral-vannadb.ipynb
+++ b/notebooks/bigquery-mistral-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "5320f104-0de6-5010-91da-eb74229e24f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c9785756-74dc-5b62-9326-f3fcab07ba39", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "266ccdb8-45fd-583e-8c94-0749162edfa3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "81efadca-5a5f-5a99-8133-b7fa84f17803", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,bigquery]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "5ea587f2-3901-5ae4-9dff-7ec9a6a5883c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "5320f104-0de6-5010-91da-eb74229e24f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c9785756-74dc-5b62-9326-f3fcab07ba39", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "bcf9504b-741a-5a51-9a38-9fced9141eb4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "81efadca-5a5f-5a99-8133-b7fa84f17803", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,bigquery]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "5ea587f2-3901-5ae4-9dff-7ec9a6a5883c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-chromadb.ipynb b/notebooks/bigquery-openai-azure-chromadb.ipynb
index 4515c9e8..ddde86dd 100644
--- a/notebooks/bigquery-openai-azure-chromadb.ipynb
+++ b/notebooks/bigquery-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f439e467-8402-5423-9822-318c50b4831c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "28490bf2-fdd2-54f5-985d-7de2b28c383e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "96e88da1-27bf-55c0-a07a-6734ac70a45d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "b9cb2809-a933-5cab-a0bb-bf20e4c8aa6d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f439e467-8402-5423-9822-318c50b4831c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "28490bf2-fdd2-54f5-985d-7de2b28c383e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "6021c0fd-69f9-5099-8a33-221bb2265f9f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "b9cb2809-a933-5cab-a0bb-bf20e4c8aa6d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-marqo.ipynb b/notebooks/bigquery-openai-azure-marqo.ipynb
new file mode 100644
index 00000000..0ccb295c
--- /dev/null
+++ b/notebooks/bigquery-openai-azure-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "724c58f8-dd5b-5cb7-a57d-e5feb8a34241", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a1de0c40-6cae-5533-b5da-d1c009288950", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3b154ef8-82dd-5861-b2b4-ae900175e86c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "e679caf9-0f53-5826-b5a3-d407c7b11d0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai,bigquery]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c13f4f8a-388e-5e54-968e-86ee639ac7cf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "52b44a3f-b512-5800-8148-1ab407ec65da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-other-vectordb.ipynb b/notebooks/bigquery-openai-azure-other-vectordb.ipynb
index cbe8229e..f29ef090 100644
--- a/notebooks/bigquery-openai-azure-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "35185a9e-35a3-56fe-b403-fe55f23a645e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ae9f6a12-733a-59e3-8ea5-6e747247433c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "e091ee15-bdd4-5e5e-a449-273559ead5bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "cc10852e-5853-5c8c-a6bd-0561797b1386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "35185a9e-35a3-56fe-b403-fe55f23a645e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ae9f6a12-733a-59e3-8ea5-6e747247433c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4042d182-e243-5d76-a804-54a96b57e635", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "cc10852e-5853-5c8c-a6bd-0561797b1386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-vannadb.ipynb b/notebooks/bigquery-openai-azure-vannadb.ipynb
index eb8a713b..bcacf17a 100644
--- a/notebooks/bigquery-openai-azure-vannadb.ipynb
+++ b/notebooks/bigquery-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "fac5b46f-4116-58e0-889d-969818e7888b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "909723d5-4778-5fc6-a3c2-1a0753650a1f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0352a46c-ea13-5ed1-95d3-8cbc90da996e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "c5473333-5768-5fb8-9247-e26ba37fbf98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "fac5b46f-4116-58e0-889d-969818e7888b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "909723d5-4778-5fc6-a3c2-1a0753650a1f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "05520bfd-6608-5f27-bac1-90b3d0818583", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "c5473333-5768-5fb8-9247-e26ba37fbf98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-chromadb.ipynb b/notebooks/bigquery-openai-standard-chromadb.ipynb
index a0ced091..1fa7fa8c 100644
--- a/notebooks/bigquery-openai-standard-chromadb.ipynb
+++ b/notebooks/bigquery-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "774bf429-d57a-589e-a818-f746cfbd1333", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c034094a-80ae-5404-9716-f09b9c785fdd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "225b22cd-cc83-592f-8aa9-e7222d40c4fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "905acd30-8700-5d7e-8602-dd760e75e5df", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "774bf429-d57a-589e-a818-f746cfbd1333", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c034094a-80ae-5404-9716-f09b9c785fdd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "bf62ded0-0c0b-5b96-b2e6-e7a773044ed7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "905acd30-8700-5d7e-8602-dd760e75e5df", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-marqo.ipynb b/notebooks/bigquery-openai-standard-marqo.ipynb
new file mode 100644
index 00000000..ab62525d
--- /dev/null
+++ b/notebooks/bigquery-openai-standard-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "3e94b5c0-cfeb-5434-bfa1-23663e220b37", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "64d7f2aa-9641-5fba-b399-5f39b8ce0499", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "6063fe15-d88f-5654-8712-20bb3bc48fc4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "e679caf9-0f53-5826-b5a3-d407c7b11d0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai,bigquery]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "3ce3c4db-08c1-5d0a-86df-2f7b35b82bc2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "64419075-650f-55d6-a5e8-4db4a9e32358", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-other-vectordb.ipynb b/notebooks/bigquery-openai-standard-other-vectordb.ipynb
index 9d734c6f..69808418 100644
--- a/notebooks/bigquery-openai-standard-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e613d63a-b713-506f-8fb3-41b2c0a8e863", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "60917c76-36c7-55b7-b1c4-5c82915d0d08", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f5dde447-1b11-5e43-af09-d4a5345bacb1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2718a3d3-183c-50b5-b959-be79bd3b071f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e613d63a-b713-506f-8fb3-41b2c0a8e863", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "60917c76-36c7-55b7-b1c4-5c82915d0d08", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "2c95993b-0824-50f4-8d2e-2418fedfc8fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2718a3d3-183c-50b5-b959-be79bd3b071f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-vannadb.ipynb b/notebooks/bigquery-openai-standard-vannadb.ipynb
index b89ebe34..8a95afdf 100644
--- a/notebooks/bigquery-openai-standard-vannadb.ipynb
+++ b/notebooks/bigquery-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "2475ecc1-295b-55a3-86b7-9b851bec073e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "3229453a-a3f6-5835-bf6b-e921f8abddbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da558514-b83a-5046-9a4d-f414e5bdc0f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "29194c94-ffb6-5db6-a6f8-18a9ea63e8da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "2475ecc1-295b-55a3-86b7-9b851bec073e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "3229453a-a3f6-5835-bf6b-e921f8abddbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "09614309-a454-5268-911a-11f1690c25e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "29194c94-ffb6-5db6-a6f8-18a9ea63e8da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-chromadb.ipynb b/notebooks/bigquery-openai-vanna-chromadb.ipynb
index 132fb657..ddef2179 100644
--- a/notebooks/bigquery-openai-vanna-chromadb.ipynb
+++ b/notebooks/bigquery-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "525bec9c-32bd-5fc9-b211-7909d17a700e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "93465ae8-5642-5cd8-abe5-20cce04e1c4f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "6acfa978-70fd-5fc0-b5cc-1f08e5f9fa2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "4b45f470-52c3-5551-b0f5-28a169a2417c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "525bec9c-32bd-5fc9-b211-7909d17a700e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "93465ae8-5642-5cd8-abe5-20cce04e1c4f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4990aff0-c5b5-569d-b61c-c0adb1bc5aa9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "4b45f470-52c3-5551-b0f5-28a169a2417c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-marqo.ipynb b/notebooks/bigquery-openai-vanna-marqo.ipynb
new file mode 100644
index 00000000..5cb9ce0a
--- /dev/null
+++ b/notebooks/bigquery-openai-vanna-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "8fadb025-65e0-5133-964e-2e7db62399c5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0a0afd1f-06be-5e48-b6ff-44530dec901d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "fc5d67c6-97b4-5469-880b-8585e7b48226", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3a44bfd8-bc08-51dd-a131-54b24aa974a2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,bigquery]'"}, {"id": "a1877af9-e758-52a4-8f21-3d7b2e0c904f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\n"}, {"id": "b0e1d67a-58fd-5bcd-b2a2-a13317fda343", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n\nvn = MyVanna()\n"}, {"id": "b46c8c2b-5749-5dd4-a290-c56ad937fadb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-other-vectordb.ipynb b/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
index 8b80c3f4..14fb3c9a 100644
--- a/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "987b3c74-035d-562b-9288-167f1a027019", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8468a145-ef33-5a52-b9dd-f0f5280b0e99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "323a83c6-fff4-503f-bf6e-f67d82ebff62", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "cc057755-c7d2-5e29-8f5d-1c8a5627898b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "987b3c74-035d-562b-9288-167f1a027019", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8468a145-ef33-5a52-b9dd-f0f5280b0e99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4d924c82-d069-5d4b-bc4a-8985f20e4507", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "cc057755-c7d2-5e29-8f5d-1c8a5627898b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-vannadb.ipynb b/notebooks/bigquery-openai-vanna-vannadb.ipynb
index 74bb1a41..a4b05bdf 100644
--- a/notebooks/bigquery-openai-vanna-vannadb.ipynb
+++ b/notebooks/bigquery-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "67a26e9c-c6fb-56bc-a8f2-fdcb5294e46e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "07242f18-8018-5856-96bd-02f96b0dd5cc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da558514-b83a-5046-9a4d-f414e5bdc0f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "118ae34e-7db2-52a8-ba6a-e4cda0c5c684", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "67a26e9c-c6fb-56bc-a8f2-fdcb5294e46e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "07242f18-8018-5856-96bd-02f96b0dd5cc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "09614309-a454-5268-911a-11f1690c25e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "118ae34e-7db2-52a8-ba6a-e4cda0c5c684", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-chromadb.ipynb b/notebooks/bigquery-other-llm-chromadb.ipynb
index efde7243..55d6d1da 100644
--- a/notebooks/bigquery-other-llm-chromadb.ipynb
+++ b/notebooks/bigquery-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "670a54ff-01bf-5be5-bbd3-b8f7b39b67c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "94a97b34-d9b6-56f6-a3ba-3881ab640c9f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "b537adb9-0ba8-5d4a-b325-e517c6abc2e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "aa232add-2e37-50a8-9e80-c1b5d8a98346", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "670a54ff-01bf-5be5-bbd3-b8f7b39b67c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "94a97b34-d9b6-56f6-a3ba-3881ab640c9f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "c5ec0ab5-b97e-5327-93ed-1bcf19f4e280", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "aa232add-2e37-50a8-9e80-c1b5d8a98346", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-marqo.ipynb b/notebooks/bigquery-other-llm-marqo.ipynb
new file mode 100644
index 00000000..42dcfd07
--- /dev/null
+++ b/notebooks/bigquery-other-llm-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "317a612f-84c0-5133-85f1-83088d93d709", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0c856b13-a416-54c7-bee1-22b1644680c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "804cbc6a-08b9-562e-8376-7876ec24b3de", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3a44bfd8-bc08-51dd-a131-54b24aa974a2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,bigquery]'"}, {"id": "2e26545c-6029-5c62-b146-105086c10f6d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c1812db9-f2e2-5e21-98c1-d00397d25f65", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(Marqo, MyCustomLLM):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "658d4f52-5b00-5f99-9d7b-65c0be64fc50", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-other-vectordb.ipynb b/notebooks/bigquery-other-llm-other-vectordb.ipynb
index 3f0010e6..6330773d 100644
--- a/notebooks/bigquery-other-llm-other-vectordb.ipynb
+++ b/notebooks/bigquery-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "329a34b6-ac61-5e7f-bd53-4dea75316751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "46ab4cb5-529f-5357-b72a-b6d215937d54", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "36ad2412-992c-59fc-8b71-ca4d08ce4853", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "874380de-32c3-5bfc-b26b-3a4d227543fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "329a34b6-ac61-5e7f-bd53-4dea75316751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "46ab4cb5-529f-5357-b72a-b6d215937d54", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "075ce30c-1025-5b09-b870-888aca19466b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "874380de-32c3-5bfc-b26b-3a4d227543fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-vannadb.ipynb b/notebooks/bigquery-other-llm-vannadb.ipynb
index 45767602..68bbe204 100644
--- a/notebooks/bigquery-other-llm-vannadb.ipynb
+++ b/notebooks/bigquery-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "dab5efa9-fc30-5f4d-ae12-aca6cf81438d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "29381975-fd85-523b-9ede-7dd7b9b69cc0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9a2db276-d7f0-588e-87bb-33a1a6a0277d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "f74a2155-0435-5765-a95e-f89dc789d8f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "dab5efa9-fc30-5f4d-ae12-aca6cf81438d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "29381975-fd85-523b-9ede-7dd7b9b69cc0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "e533ec83-bcaf-5683-b16f-788617340378", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "f74a2155-0435-5765-a95e-f89dc789d8f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-mistral-chromadb.ipynb b/notebooks/other-database-mistral-chromadb.ipynb
index fbebbcb8..d4b4a596 100644
--- a/notebooks/other-database-mistral-chromadb.ipynb
+++ b/notebooks/other-database-mistral-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "22323c93-597c-5855-a512-939a14abf9e5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6ae2763b-0004-5c7a-b3ac-a55a75770c34", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7a8290cb-31c0-5f75-879f-db4b411e0416", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30aef63a-a75b-52bd-8f9c-7fc096e0aba3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2c0b60ff-e9ac-56e1-96c9-004f1ca84273", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "22323c93-597c-5855-a512-939a14abf9e5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6ae2763b-0004-5c7a-b3ac-a55a75770c34", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "e06c544b-8433-5bbd-b0cf-0b9130f134bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30aef63a-a75b-52bd-8f9c-7fc096e0aba3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2c0b60ff-e9ac-56e1-96c9-004f1ca84273", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-mistral-marqo.ipynb b/notebooks/other-database-mistral-marqo.ipynb
new file mode 100644
index 00000000..891225f6
--- /dev/null
+++ b/notebooks/other-database-mistral-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "ca51d95b-179d-5082-896d-247b220d8713", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8cd881d4-867e-522b-9d09-084e04c93f8a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "75c9a403-0a59-57cb-9d7c-4c6ae11ede15", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "ec51e5b5-fc27-5c00-b1a8-6c9dd188e2f5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,mistralai]'"}, {"id": "696e9c9c-d724-56db-9a06-10a66dc15b0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "de83d46a-6116-5978-8b9a-f0f586a583bd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, Mistral):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "9eb495d5-2591-581b-b603-02f04004a4b9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-mistral-other-vectordb.ipynb b/notebooks/other-database-mistral-other-vectordb.ipynb
index 29c8aa0b..f9f1e085 100644
--- a/notebooks/other-database-mistral-other-vectordb.ipynb
+++ b/notebooks/other-database-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "924358e7-d53d-5554-baff-faa508826010", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "69c22c46-b8d4-5ad1-a81a-f71314449d2a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "55131877-181c-50e0-89e7-5cdd916f7e7b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2269b218-ec69-598e-a8fa-19a8d7777081", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "924358e7-d53d-5554-baff-faa508826010", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "69c22c46-b8d4-5ad1-a81a-f71314449d2a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "94226258-345b-5cae-b6df-88c323f134d6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2269b218-ec69-598e-a8fa-19a8d7777081", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-mistral-vannadb.ipynb b/notebooks/other-database-mistral-vannadb.ipynb
index 571e07a9..8c3f1b1e 100644
--- a/notebooks/other-database-mistral-vannadb.ipynb
+++ b/notebooks/other-database-mistral-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0e71bc98-8716-580a-b971-496b54f1af8e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "08388fc8-a2a0-51ff-a240-e9c7c1921808", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "a5b630f8-2efa-5d47-b541-b1bb4c6f47fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "60b67b01-aa53-5466-be45-e3fac6b219d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0e71bc98-8716-580a-b971-496b54f1af8e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "08388fc8-a2a0-51ff-a240-e9c7c1921808", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3ea06d46-34b7-5339-b787-5c3e207a48f6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "60b67b01-aa53-5466-be45-e3fac6b219d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-chromadb.ipynb b/notebooks/other-database-openai-azure-chromadb.ipynb
index 5ae385e0..a8299dd2 100644
--- a/notebooks/other-database-openai-azure-chromadb.ipynb
+++ b/notebooks/other-database-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "92ea27dc-5881-5eb6-93c3-455e04899d35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a30bd540-225c-5ca0-9f80-3628945e8366", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7acad14f-3030-5462-8cbb-7fcd09e23702", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "9f04a063-5398-5e44-ae93-cdde14aee529", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "92ea27dc-5881-5eb6-93c3-455e04899d35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a30bd540-225c-5ca0-9f80-3628945e8366", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "598f1d53-90e9-5e9c-80c7-e375624cafae", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "9f04a063-5398-5e44-ae93-cdde14aee529", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-marqo.ipynb b/notebooks/other-database-openai-azure-marqo.ipynb
new file mode 100644
index 00000000..5e402d5a
--- /dev/null
+++ b/notebooks/other-database-openai-azure-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "57d4a531-2711-5199-b845-0b545e6bd26a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "403b36a4-dcbf-591a-8b34-2cee340d8ae4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "1e2930f2-ae73-5c8c-8631-57cc399965a4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3d31b251-dcb5-5c7d-b5da-b692698ecdfa", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c13f4f8a-388e-5e54-968e-86ee639ac7cf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "da2c0478-b4ad-5741-a502-8db0103d6457", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-other-vectordb.ipynb b/notebooks/other-database-openai-azure-other-vectordb.ipynb
index e07d2245..203e24a4 100644
--- a/notebooks/other-database-openai-azure-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "28e43ee2-89a3-5c6e-972c-18e14187ecbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "98bd55a9-8534-57e0-bbf2-790053912d0d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "1c929444-5496-51a1-a50d-573bf8f8bf42", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "06e1d645-97b3-5338-b39a-ed29e0adae10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "28e43ee2-89a3-5c6e-972c-18e14187ecbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "98bd55a9-8534-57e0-bbf2-790053912d0d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5307d257-caf4-590b-90ed-98348d2066f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "06e1d645-97b3-5338-b39a-ed29e0adae10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-vannadb.ipynb b/notebooks/other-database-openai-azure-vannadb.ipynb
index 9106a76a..79e645ba 100644
--- a/notebooks/other-database-openai-azure-vannadb.ipynb
+++ b/notebooks/other-database-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "c84b946c-c10e-5f4d-b0e9-1053cac2d9c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c08f1c4c-ba75-5ac2-882a-ca5ed62a8475", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "501eb87f-96cc-572a-8f3f-c750b45f9a2b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "5b6052f4-cae6-5e27-bb45-7e1c0aa43386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "c84b946c-c10e-5f4d-b0e9-1053cac2d9c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c08f1c4c-ba75-5ac2-882a-ca5ed62a8475", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "c9f0b7cb-f486-57ed-9445-45411e66eb77", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "5b6052f4-cae6-5e27-bb45-7e1c0aa43386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-chromadb.ipynb b/notebooks/other-database-openai-standard-chromadb.ipynb
index 3a7bcb1f..397f09d6 100644
--- a/notebooks/other-database-openai-standard-chromadb.ipynb
+++ b/notebooks/other-database-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "deeb1a83-b623-52c8-9e9c-8fde4842d65e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b4e86bda-15bf-591d-88bb-fa94a50b50e0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bcef569-d644-5f3c-917a-8310f43644d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "f79e1f30-941b-5975-b947-e84bba787d9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "deeb1a83-b623-52c8-9e9c-8fde4842d65e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b4e86bda-15bf-591d-88bb-fa94a50b50e0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4e1be90d-831a-592e-8f48-4d6d19b409be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "f79e1f30-941b-5975-b947-e84bba787d9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-marqo.ipynb b/notebooks/other-database-openai-standard-marqo.ipynb
new file mode 100644
index 00000000..e1825764
--- /dev/null
+++ b/notebooks/other-database-openai-standard-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "4fc64584-bab8-5895-a16b-2d54274bc40b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "bb1d1e59-d6cf-5ab3-bcbf-8166b0d8f1c3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "862a7b2b-d7ce-51aa-a02a-08ffe268f8a9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3d31b251-dcb5-5c7d-b5da-b692698ecdfa", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "3ce3c4db-08c1-5d0a-86df-2f7b35b82bc2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "3cc71855-e54f-55f8-8faa-de53f1943c67", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-other-vectordb.ipynb b/notebooks/other-database-openai-standard-other-vectordb.ipynb
index 5fba017f..687c4578 100644
--- a/notebooks/other-database-openai-standard-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "83e6caa0-808d-5e3f-a3bc-dd6055253309", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0dc9e571-d346-5296-9c2b-6288e2e7fdfd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "29473aa9-8149-5272-bf3b-e498edbcc2e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "863e74db-ce0d-5c2b-abcf-a57a0583b638", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "83e6caa0-808d-5e3f-a3bc-dd6055253309", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0dc9e571-d346-5296-9c2b-6288e2e7fdfd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "02a256fb-5c94-572c-aa70-8d52c604073b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "863e74db-ce0d-5c2b-abcf-a57a0583b638", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-vannadb.ipynb b/notebooks/other-database-openai-standard-vannadb.ipynb
index 486f3948..b80cb219 100644
--- a/notebooks/other-database-openai-standard-vannadb.ipynb
+++ b/notebooks/other-database-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "46dcb449-a2c9-571d-a7ce-d6450eb19571", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "db8cee28-61fc-5750-bc66-8a3f8e312e9c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "805bcd0a-a97d-55f7-836f-9df9ced3fad4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "00c06f03-db34-5b90-9f5e-d3836f69d656", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "46dcb449-a2c9-571d-a7ce-d6450eb19571", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "db8cee28-61fc-5750-bc66-8a3f8e312e9c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0c3b4092-39bd-5a07-a8de-4c443475c7b4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "00c06f03-db34-5b90-9f5e-d3836f69d656", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-chromadb.ipynb b/notebooks/other-database-openai-vanna-chromadb.ipynb
index 7edbb012..35f50056 100644
--- a/notebooks/other-database-openai-vanna-chromadb.ipynb
+++ b/notebooks/other-database-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0137feba-b7f4-52f8-985b-c86be0e0f5bb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "105b1b11-2b54-59ac-8b07-3fc3536824da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "317fde73-6c98-5e93-86b4-3ccb542cd831", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "2ad31f21-0a91-5647-8f68-469156d9f90c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0137feba-b7f4-52f8-985b-c86be0e0f5bb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "105b1b11-2b54-59ac-8b07-3fc3536824da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "2ce897eb-059f-5ca9-bbd1-36aaed7c87cf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "2ad31f21-0a91-5647-8f68-469156d9f90c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-marqo.ipynb b/notebooks/other-database-openai-vanna-marqo.ipynb
new file mode 100644
index 00000000..1a1e09b3
--- /dev/null
+++ b/notebooks/other-database-openai-vanna-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "b3805a05-e5f3-5ea2-9c50-25a1c9a51a75", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "30b9ca1a-c459-53b6-ba57-d8a6bb5cb9fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "aa4a36b4-5201-5e6e-9f0f-89b2e2b8ffc5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30f0eccb-7684-5642-98ae-31bee8662133", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo]'"}, {"id": "a1877af9-e758-52a4-8f21-3d7b2e0c904f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\n"}, {"id": "b0e1d67a-58fd-5bcd-b2a2-a13317fda343", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n\nvn = MyVanna()\n"}, {"id": "f2b198ee-2ce7-5aa6-8724-67ce55463cfd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-other-vectordb.ipynb b/notebooks/other-database-openai-vanna-other-vectordb.ipynb
index 1110e472..ee85ca8d 100644
--- a/notebooks/other-database-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "dea72209-4768-5f52-ba75-ae7671cc46fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a35e6042-02e6-5934-b6c7-45abff11bcef", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5789a6a3-f0b7-5afb-95d7-f46ebc0ca1eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "43195316-d36c-517e-a899-130dc36e8acd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "dea72209-4768-5f52-ba75-ae7671cc46fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a35e6042-02e6-5934-b6c7-45abff11bcef", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "31e4174d-4d1d-51db-9a91-37d3ba8a2840", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "43195316-d36c-517e-a899-130dc36e8acd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-vannadb.ipynb b/notebooks/other-database-openai-vanna-vannadb.ipynb
index 133e1d7b..31e918ab 100644
--- a/notebooks/other-database-openai-vanna-vannadb.ipynb
+++ b/notebooks/other-database-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0f6a628d-5dca-58b1-b690-40302dab9bf2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1a507369-8ade-5a6e-b349-9927657cb37f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "805bcd0a-a97d-55f7-836f-9df9ced3fad4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "056357dd-f7cf-599f-b170-2a75609c7f10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0f6a628d-5dca-58b1-b690-40302dab9bf2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1a507369-8ade-5a6e-b349-9927657cb37f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0c3b4092-39bd-5a07-a8de-4c443475c7b4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "056357dd-f7cf-599f-b170-2a75609c7f10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-chromadb.ipynb b/notebooks/other-database-other-llm-chromadb.ipynb
index 6da0ddd5..2a11abf6 100644
--- a/notebooks/other-database-other-llm-chromadb.ipynb
+++ b/notebooks/other-database-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "7b709983-57b5-5bc7-940a-bcbf832468c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b348bb01-4210-561c-bbd9-759bb6a11099", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "1ac99bf9-08b8-5d77-82ee-7416409862ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "10714cee-02a9-5d1e-aa5a-841894519801", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "7b709983-57b5-5bc7-940a-bcbf832468c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b348bb01-4210-561c-bbd9-759bb6a11099", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "5cd05dbb-a269-5efa-baa4-1a292b9a4d8c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "10714cee-02a9-5d1e-aa5a-841894519801", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-marqo.ipynb b/notebooks/other-database-other-llm-marqo.ipynb
new file mode 100644
index 00000000..6bd23c6a
--- /dev/null
+++ b/notebooks/other-database-other-llm-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "3a87478b-c54b-5bdd-b94f-e7ea9d43e69a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f9acdcfc-f442-571b-9ff3-544bb5a2421e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "cd409d49-fd96-5a72-80f4-ad705ec788b1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30f0eccb-7684-5642-98ae-31bee8662133", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo]'"}, {"id": "2e26545c-6029-5c62-b146-105086c10f6d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c1812db9-f2e2-5e21-98c1-d00397d25f65", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(Marqo, MyCustomLLM):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "3763c949-be8a-5e13-9b62-80cf3929f5be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-other-vectordb.ipynb b/notebooks/other-database-other-llm-other-vectordb.ipynb
index 172ceba4..74632fc6 100644
--- a/notebooks/other-database-other-llm-other-vectordb.ipynb
+++ b/notebooks/other-database-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "d4a7bd4f-e238-5168-8c26-03ab7d9b6ceb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6b7aad07-6701-5974-9b5a-c6ed21cc3eca", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "655215da-b934-5419-a73a-9bf4b77e96d8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "14a246cb-cc7e-5bb1-b931-cf58d8c86b53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "d4a7bd4f-e238-5168-8c26-03ab7d9b6ceb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6b7aad07-6701-5974-9b5a-c6ed21cc3eca", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "445e8689-7756-5dab-8e76-46f1601ed2b1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "14a246cb-cc7e-5bb1-b931-cf58d8c86b53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-vannadb.ipynb b/notebooks/other-database-other-llm-vannadb.ipynb
index 684f916a..fffad86f 100644
--- a/notebooks/other-database-other-llm-vannadb.ipynb
+++ b/notebooks/other-database-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "8b253300-a4e8-5b94-8ef5-958eefde2756", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "638a9f05-ed43-58ab-aa9b-2f88ddc6a370", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "a6970aff-d479-566f-8fd4-4cd44851ce28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "d2df2e38-f9f0-5483-9181-59cefa62c124", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "8b253300-a4e8-5b94-8ef5-958eefde2756", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "638a9f05-ed43-58ab-aa9b-2f88ddc6a370", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "cb07c322-cd04-54d6-9c9a-7d262dec914f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "d2df2e38-f9f0-5483-9181-59cefa62c124", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-mistral-chromadb.ipynb b/notebooks/postgres-mistral-chromadb.ipynb
index 73666a74..f8573ee6 100644
--- a/notebooks/postgres-mistral-chromadb.ipynb
+++ b/notebooks/postgres-mistral-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0f3048c8-acae-5ca8-b441-2e74e9190bc2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a03347e7-7aec-59e2-a48b-a8b757da02f7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "89d75cda-e5b9-5911-91ed-0cf5c06d7f69", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "8f8fcaac-414d-5dc4-9d96-2a42ce33b9db", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,postgres]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "3d1b4791-ba66-588e-90d5-81366b367805", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0f3048c8-acae-5ca8-b441-2e74e9190bc2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a03347e7-7aec-59e2-a48b-a8b757da02f7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "b8762223-46dd-5535-be5b-16e5128ea5c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "8f8fcaac-414d-5dc4-9d96-2a42ce33b9db", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,postgres]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "3d1b4791-ba66-588e-90d5-81366b367805", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-mistral-marqo.ipynb b/notebooks/postgres-mistral-marqo.ipynb
new file mode 100644
index 00000000..b1b09199
--- /dev/null
+++ b/notebooks/postgres-mistral-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "aa811fa4-4d25-5b33-9da8-e68a69b87e0a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "369fb3d9-2130-513d-af1f-aa2ac6672dc5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "695d1031-2ae4-5061-a36d-23e81a133eb0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "f3cf7c8f-f599-519b-8e2e-33106f15963a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,mistralai,postgres]'"}, {"id": "696e9c9c-d724-56db-9a06-10a66dc15b0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "de83d46a-6116-5978-8b9a-f0f586a583bd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, Mistral):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "74eee46a-7c9e-509d-83ab-c828c12e7d25", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-mistral-other-vectordb.ipynb b/notebooks/postgres-mistral-other-vectordb.ipynb
index b1578699..b3a965cc 100644
--- a/notebooks/postgres-mistral-other-vectordb.ipynb
+++ b/notebooks/postgres-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "1bf7ca05-0a4b-56c4-8958-79b30e5afc2e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "13f1c9f5-c46b-56da-a35a-83299b1735b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8de7ee64-7ba3-5daf-860f-d4ba11e68b3f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "72ba7676-e2d8-5a7e-ac6e-f7578822efb5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,postgres]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "ef45e977-adff-5d4f-a769-28be071afb5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "1bf7ca05-0a4b-56c4-8958-79b30e5afc2e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "13f1c9f5-c46b-56da-a35a-83299b1735b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "e8abf792-ee0c-5975-95ff-67d2b0ddbaba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "72ba7676-e2d8-5a7e-ac6e-f7578822efb5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,postgres]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "ef45e977-adff-5d4f-a769-28be071afb5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-mistral-vannadb.ipynb b/notebooks/postgres-mistral-vannadb.ipynb
index 0a6ecdfb..37211cdf 100644
--- a/notebooks/postgres-mistral-vannadb.ipynb
+++ b/notebooks/postgres-mistral-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "7eb3f36a-c79b-5c0c-8eb6-4601a6c3c634", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e0d82a7a-03af-5945-94b5-d9b71c024f01", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d602c88f-320a-57e8-85ee-73685b5c9cd1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "72ba7676-e2d8-5a7e-ac6e-f7578822efb5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,postgres]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "ce84935a-2ca8-5f5b-b3fc-1f0c77380d9a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "7eb3f36a-c79b-5c0c-8eb6-4601a6c3c634", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e0d82a7a-03af-5945-94b5-d9b71c024f01", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "eaaf2b90-2f4f-5c81-ae7b-590fd5be3358", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "72ba7676-e2d8-5a7e-ac6e-f7578822efb5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,postgres]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "ce84935a-2ca8-5f5b-b3fc-1f0c77380d9a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-chromadb.ipynb b/notebooks/postgres-openai-azure-chromadb.ipynb
index d5a01213..9ab33892 100644
--- a/notebooks/postgres-openai-azure-chromadb.ipynb
+++ b/notebooks/postgres-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e7cd5976-e784-52c5-be86-b454ffa806c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d69856a3-3ea7-5ca5-8159-040262aa4049", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7a79bcc1-c725-5378-bb13-75291ac67f99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "125a30f1-60b6-5ad5-a154-40363adc38d1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e7cd5976-e784-52c5-be86-b454ffa806c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d69856a3-3ea7-5ca5-8159-040262aa4049", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8eb5354c-d5fb-5b74-92a2-77b2ced899b0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "125a30f1-60b6-5ad5-a154-40363adc38d1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-marqo.ipynb b/notebooks/postgres-openai-azure-marqo.ipynb
new file mode 100644
index 00000000..6ce2737a
--- /dev/null
+++ b/notebooks/postgres-openai-azure-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "bd301e2e-9b25-5c91-aebc-1f0c23d76aad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "11a49ddb-4573-5a2d-8673-58f553dfdb70", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "24d05cd0-80cc-5664-8a9e-6248fc56ffaa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "62983037-10e1-53ea-ac40-912d81a2c447", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai,postgres]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c13f4f8a-388e-5e54-968e-86ee639ac7cf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "65bd16f4-8fc8-504d-a834-12a24f127c66", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-other-vectordb.ipynb b/notebooks/postgres-openai-azure-other-vectordb.ipynb
index bef054c0..6f0f9489 100644
--- a/notebooks/postgres-openai-azure-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6c5bdd16-d84b-527f-a805-d01f53e7337c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "55d62c18-7f8e-53cf-8a60-fb5abd6efee4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "46f63606-ab8f-5d12-b7a2-ca467c1921e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "e11da77b-6e86-53a2-9c7b-700c2550921e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6c5bdd16-d84b-527f-a805-d01f53e7337c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "55d62c18-7f8e-53cf-8a60-fb5abd6efee4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4aafd94a-06eb-51e1-8df7-409c90675dc5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "e11da77b-6e86-53a2-9c7b-700c2550921e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-vannadb.ipynb b/notebooks/postgres-openai-azure-vannadb.ipynb
index a8c5544d..8971606a 100644
--- a/notebooks/postgres-openai-azure-vannadb.ipynb
+++ b/notebooks/postgres-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "778e258a-8586-5d24-bde4-0a1a6361f6a9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b707bd2c-b6fa-517e-944f-1f316438c252", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3f5adb23-3700-54cb-8072-9e953b9c5273", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "6aed1fd9-7ffe-503a-bd0d-ee0fb914aeff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "778e258a-8586-5d24-bde4-0a1a6361f6a9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b707bd2c-b6fa-517e-944f-1f316438c252", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "59f5454a-9c3a-5ea2-9d20-7a32ca5813b4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "6aed1fd9-7ffe-503a-bd0d-ee0fb914aeff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-chromadb.ipynb b/notebooks/postgres-openai-standard-chromadb.ipynb
index c272ac7c..7eb2f235 100644
--- a/notebooks/postgres-openai-standard-chromadb.ipynb
+++ b/notebooks/postgres-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "95b3f23b-49e5-5f79-bedc-f3e5dea0f14f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a552e618-fefd-5a0d-8005-c47323ee0a3a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3cfc3d9c-9ca0-55b4-9a78-7c033b5a5bf0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "69b47dc7-f766-59c7-8f50-3390f2044c0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "95b3f23b-49e5-5f79-bedc-f3e5dea0f14f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a552e618-fefd-5a0d-8005-c47323ee0a3a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "65fe6254-dbf1-59dc-91c6-fad08e3b05ff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "69b47dc7-f766-59c7-8f50-3390f2044c0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-marqo.ipynb b/notebooks/postgres-openai-standard-marqo.ipynb
new file mode 100644
index 00000000..55c81608
--- /dev/null
+++ b/notebooks/postgres-openai-standard-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "82494868-ca31-5a5b-aa10-f5ebf57091af", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "095e6e97-1391-5e97-8901-e7cc282058c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "02dcf380-b5e4-518f-97e7-50dde2f89dca", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "62983037-10e1-53ea-ac40-912d81a2c447", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai,postgres]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "3ce3c4db-08c1-5d0a-86df-2f7b35b82bc2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "89e241fd-d77b-5933-bac7-48d16e01120f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-other-vectordb.ipynb b/notebooks/postgres-openai-standard-other-vectordb.ipynb
index 960118ee..4fa9b847 100644
--- a/notebooks/postgres-openai-standard-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "ad6a7ca6-de11-5e6c-accf-b908b3b5f536", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cc22b65c-c118-568d-95b5-320743ba50ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "bca6a0ac-79c7-59c7-8b54-1cba19c49b91", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "b3cf62dc-7c47-56f5-8dd9-65fc75597155", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "ad6a7ca6-de11-5e6c-accf-b908b3b5f536", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cc22b65c-c118-568d-95b5-320743ba50ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3ce03f11-0930-5754-a09e-dd673300f806", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "b3cf62dc-7c47-56f5-8dd9-65fc75597155", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-vannadb.ipynb b/notebooks/postgres-openai-standard-vannadb.ipynb
index 3f17dffb..153f2523 100644
--- a/notebooks/postgres-openai-standard-vannadb.ipynb
+++ b/notebooks/postgres-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "bd885f8c-8938-5a71-826f-6c4000c70508", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "23336edb-af5c-5cee-a11a-da2a8355603f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee38d28e-9586-5360-a872-95655441a9d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "c6ccc82b-147a-5e2e-937d-74a8dc8c5582", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "bd885f8c-8938-5a71-826f-6c4000c70508", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "23336edb-af5c-5cee-a11a-da2a8355603f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ea6ba7f8-b88b-5281-b2ff-3bf99ac5a721", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "c6ccc82b-147a-5e2e-937d-74a8dc8c5582", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-chromadb.ipynb b/notebooks/postgres-openai-vanna-chromadb.ipynb
index ed049a81..3462063c 100644
--- a/notebooks/postgres-openai-vanna-chromadb.ipynb
+++ b/notebooks/postgres-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "df07813d-72a1-5452-8dc9-f6894538a24b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "75fcde7a-5d87-565f-81e2-8270d4b4be99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "24cf5a7e-e723-58d1-ad4e-b28ead2724aa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "febd7f76-f6f4-570d-a21f-a522c19a44dd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "df07813d-72a1-5452-8dc9-f6894538a24b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "75fcde7a-5d87-565f-81e2-8270d4b4be99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "b013b6eb-ae18-5d49-995a-c462458c0904", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "febd7f76-f6f4-570d-a21f-a522c19a44dd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-marqo.ipynb b/notebooks/postgres-openai-vanna-marqo.ipynb
new file mode 100644
index 00000000..d299f982
--- /dev/null
+++ b/notebooks/postgres-openai-vanna-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "351a9c1a-ccd5-571d-8d48-cd42b3cfa2d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c9208da9-f260-5e4f-9713-1f0f7f0ed03b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "c4b953fc-fa5a-5907-8ed5-756efdb2e6ba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "2f878a3b-afef-5bb3-9a04-195b941368dc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,postgres]'"}, {"id": "a1877af9-e758-52a4-8f21-3d7b2e0c904f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\n"}, {"id": "b0e1d67a-58fd-5bcd-b2a2-a13317fda343", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n\nvn = MyVanna()\n"}, {"id": "ee3d3a89-89b0-513d-95da-7d02da979861", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-other-vectordb.ipynb b/notebooks/postgres-openai-vanna-other-vectordb.ipynb
index 2a2a5219..0042ebfc 100644
--- a/notebooks/postgres-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "c6b9a340-0204-5267-b461-47450cdb8a2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "502499f2-8200-5bdf-bc7d-4bd15b1e7f63", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0bd9a7e5-7502-5050-a738-fca444680f71", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ee11dcde-9152-5e53-860a-bb72016db15b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "c6b9a340-0204-5267-b461-47450cdb8a2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "502499f2-8200-5bdf-bc7d-4bd15b1e7f63", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "adec8661-7621-5bfe-a9aa-762ff25e5846", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ee11dcde-9152-5e53-860a-bb72016db15b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-vannadb.ipynb b/notebooks/postgres-openai-vanna-vannadb.ipynb
index 7ae4f784..3d16835b 100644
--- a/notebooks/postgres-openai-vanna-vannadb.ipynb
+++ b/notebooks/postgres-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "ff3e46c3-5735-53fe-86ba-dd1ca947e4ba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c263c951-b756-57eb-a66a-ef636ff0a696", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee38d28e-9586-5360-a872-95655441a9d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "73461d40-259b-59ae-a794-2fbc1806e412", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "ff3e46c3-5735-53fe-86ba-dd1ca947e4ba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c263c951-b756-57eb-a66a-ef636ff0a696", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ea6ba7f8-b88b-5281-b2ff-3bf99ac5a721", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "73461d40-259b-59ae-a794-2fbc1806e412", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-chromadb.ipynb b/notebooks/postgres-other-llm-chromadb.ipynb
index 42767402..13884b0d 100644
--- a/notebooks/postgres-other-llm-chromadb.ipynb
+++ b/notebooks/postgres-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "30e88248-26ea-53ad-93a6-9b3d5da41033", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0dc3de1e-d806-5f7c-86aa-f0a764bbb64c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9c8da62e-70bb-5a3b-b468-25c7275aa943", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "2436d4ef-a066-5584-b97d-9696c8bbb9ec", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "30e88248-26ea-53ad-93a6-9b3d5da41033", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0dc3de1e-d806-5f7c-86aa-f0a764bbb64c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "58e05bbe-f915-5ff5-abb9-e039319e458e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "2436d4ef-a066-5584-b97d-9696c8bbb9ec", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-marqo.ipynb b/notebooks/postgres-other-llm-marqo.ipynb
new file mode 100644
index 00000000..a39ee160
--- /dev/null
+++ b/notebooks/postgres-other-llm-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "47e6700f-2fb1-5518-bd4f-d8f1ab43e77e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "bf373d88-64a8-5f57-9f12-1238aba1bdfc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "dd109030-882d-50b1-a642-608c68e204d7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "2f878a3b-afef-5bb3-9a04-195b941368dc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,postgres]'"}, {"id": "2e26545c-6029-5c62-b146-105086c10f6d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c1812db9-f2e2-5e21-98c1-d00397d25f65", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(Marqo, MyCustomLLM):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "d827bc89-3225-58f8-8c10-ff82a8a34a58", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-other-vectordb.ipynb b/notebooks/postgres-other-llm-other-vectordb.ipynb
index 6efad3e7..f9a41316 100644
--- a/notebooks/postgres-other-llm-other-vectordb.ipynb
+++ b/notebooks/postgres-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "3a23b994-dde6-5290-a4ae-5c0fbc8143d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1a323199-007a-5773-aab8-b6510f11824b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "12091978-155e-5893-843f-42a69071be9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "8af744f9-5cb5-5a01-b58b-d736caac0164", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "3a23b994-dde6-5290-a4ae-5c0fbc8143d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1a323199-007a-5773-aab8-b6510f11824b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "4ce94f6b-39db-5297-af88-8ae081327b78", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "8af744f9-5cb5-5a01-b58b-d736caac0164", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-vannadb.ipynb b/notebooks/postgres-other-llm-vannadb.ipynb
index ba8e59c8..feeef1eb 100644
--- a/notebooks/postgres-other-llm-vannadb.ipynb
+++ b/notebooks/postgres-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "cd757483-4d52-5a68-94b5-0244087a9cdb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2ba202ba-7325-523a-9717-cb7730ea4253", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "14feae0c-c763-54d1-bf1e-a2b013e18ddd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "7b97e4ca-e994-5cf9-a09a-36a8e12f6abd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "cd757483-4d52-5a68-94b5-0244087a9cdb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2ba202ba-7325-523a-9717-cb7730ea4253", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "a34d3625-80b0-5424-a34e-81d174b7619e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "7b97e4ca-e994-5cf9-a09a-36a8e12f6abd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-mistral-chromadb.ipynb b/notebooks/snowflake-mistral-chromadb.ipynb
index e00f99ae..82bb542c 100644
--- a/notebooks/snowflake-mistral-chromadb.ipynb
+++ b/notebooks/snowflake-mistral-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "581af781-64b0-5419-b34c-f423e998b939", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "028bcaab-c1fe-57d7-ad3d-eae62574637b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "02adbe75-1c74-5303-92ae-811004580c1c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "678cedb3-98fb-53c0-8cfd-c8663ea21113", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,snowflake]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "af027177-7e5c-5b9a-ab52-93e9c0c3e2ff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "581af781-64b0-5419-b34c-f423e998b939", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "028bcaab-c1fe-57d7-ad3d-eae62574637b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "cfd53a0e-d75a-504c-86d2-24e092fb7965", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "678cedb3-98fb-53c0-8cfd-c8663ea21113", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,snowflake]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "af027177-7e5c-5b9a-ab52-93e9c0c3e2ff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-mistral-marqo.ipynb b/notebooks/snowflake-mistral-marqo.ipynb
new file mode 100644
index 00000000..e65d69d8
--- /dev/null
+++ b/notebooks/snowflake-mistral-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "dc380e10-5a07-55b0-b4ff-4c3064aab9e0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "be9a2065-77c5-5ae6-90a4-d388c26f1808", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "85967ce2-512f-5941-8c76-457f873e62cd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "784da0a2-a431-504f-ac94-72367ca7493e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,mistralai,snowflake]'"}, {"id": "696e9c9c-d724-56db-9a06-10a66dc15b0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "de83d46a-6116-5978-8b9a-f0f586a583bd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, Mistral):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "81fdbba7-51d1-56d2-9395-926a058f0ae4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-mistral-other-vectordb.ipynb b/notebooks/snowflake-mistral-other-vectordb.ipynb
index 757f8730..ce8da7a3 100644
--- a/notebooks/snowflake-mistral-other-vectordb.ipynb
+++ b/notebooks/snowflake-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "bb27d995-aa3f-5004-9976-87e64a33a9d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "59581b59-5760-589b-b8b8-423389281910", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "183935b0-040b-5a08-ab3c-3d950622c2f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "037d295c-00dd-5d10-9d2f-0e2b81de13d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,snowflake]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "9f1fa815-47fd-5a51-a79c-27065a397031", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "bb27d995-aa3f-5004-9976-87e64a33a9d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "59581b59-5760-589b-b8b8-423389281910", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ca149400-a8ed-56d7-9da0-833d08e8bbac", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "037d295c-00dd-5d10-9d2f-0e2b81de13d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,snowflake]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "9f1fa815-47fd-5a51-a79c-27065a397031", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-mistral-vannadb.ipynb b/notebooks/snowflake-mistral-vannadb.ipynb
index 2b32540f..628162c1 100644
--- a/notebooks/snowflake-mistral-vannadb.ipynb
+++ b/notebooks/snowflake-mistral-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "331570b7-be75-5d5f-8cff-7e4570addec8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "30ca42f3-0e6c-5e60-a359-503a87213bf2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "18dcf38e-e5cf-5a29-960b-b6ded0cc771b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "037d295c-00dd-5d10-9d2f-0e2b81de13d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,snowflake]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "1e5dad10-0000-5918-b510-9aa80c876212", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "331570b7-be75-5d5f-8cff-7e4570addec8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "30ca42f3-0e6c-5e60-a359-503a87213bf2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f705ed90-989d-59df-9927-e9d515b6f9f5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "037d295c-00dd-5d10-9d2f-0e2b81de13d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,snowflake]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "1e5dad10-0000-5918-b510-9aa80c876212", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-chromadb.ipynb b/notebooks/snowflake-openai-azure-chromadb.ipynb
index d8432ec5..0b5183f8 100644
--- a/notebooks/snowflake-openai-azure-chromadb.ipynb
+++ b/notebooks/snowflake-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6287c685-ff0c-5a75-a58f-fd9d47b14fc3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "7c603f36-8039-502a-983d-bf98a618b27a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0d342fd1-a5ff-5d7f-86c4-cad506f84ae4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "cd3454f0-9566-5b7b-8e28-ca637b1c3964", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6287c685-ff0c-5a75-a58f-fd9d47b14fc3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "7c603f36-8039-502a-983d-bf98a618b27a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4923d0fe-58f3-52de-8739-2a55b399f5f8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "cd3454f0-9566-5b7b-8e28-ca637b1c3964", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-marqo.ipynb b/notebooks/snowflake-openai-azure-marqo.ipynb
new file mode 100644
index 00000000..dee7df05
--- /dev/null
+++ b/notebooks/snowflake-openai-azure-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "ea82563f-1e40-530f-b1c1-b49a1ac0f125", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "4aa821af-2cc8-5a11-b109-361e5b3baf55", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3702212c-6d62-527d-8717-bb696f9ec2ed", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "792a0a6f-671d-59b0-aed2-fd6606acef80", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai,snowflake]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c13f4f8a-388e-5e54-968e-86ee639ac7cf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "172e3dad-3938-5cbd-8d61-74b25dd54eca", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-other-vectordb.ipynb b/notebooks/snowflake-openai-azure-other-vectordb.ipynb
index d3f1bfb4..6e6d7738 100644
--- a/notebooks/snowflake-openai-azure-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6aac9f07-876b-5289-9f4e-7524132617ad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1124d018-1c19-5e7e-9bc5-0ef3f2d34a44", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "44bc4304-7303-5efc-8b46-3c795d4758fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "a90a47b5-ed37-5e9a-abfb-044d3b6938e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6aac9f07-876b-5289-9f4e-7524132617ad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1124d018-1c19-5e7e-9bc5-0ef3f2d34a44", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0952250a-a0b8-57bf-bf8c-09849612f91d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "a90a47b5-ed37-5e9a-abfb-044d3b6938e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-vannadb.ipynb b/notebooks/snowflake-openai-azure-vannadb.ipynb
index 08adf81d..a24b6f34 100644
--- a/notebooks/snowflake-openai-azure-vannadb.ipynb
+++ b/notebooks/snowflake-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "813e792b-6025-521d-acaf-cc20cbd83c99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2e6e05ce-2c8b-524e-9478-07968ff24fe5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f87cecbf-d98e-5a64-a354-715ab3da3c36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "ab74636a-a65f-5a2d-bca2-b59efd9b8e18", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "813e792b-6025-521d-acaf-cc20cbd83c99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2e6e05ce-2c8b-524e-9478-07968ff24fe5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d605113c-0491-54b5-aa59-f2e311dff1ec", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "ab74636a-a65f-5a2d-bca2-b59efd9b8e18", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-chromadb.ipynb b/notebooks/snowflake-openai-standard-chromadb.ipynb
index fa9cee2f..e8c8ea6c 100644
--- a/notebooks/snowflake-openai-standard-chromadb.ipynb
+++ b/notebooks/snowflake-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "75481477-4412-582b-aba7-52183e26f37c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "339703f1-d6cd-5c0f-8ac1-c221f0798512", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "a8ac9a78-92a7-56dd-8352-c00f6df060ab", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "6272d9bb-f7dd-5b43-8db4-64817b295df0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "75481477-4412-582b-aba7-52183e26f37c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "339703f1-d6cd-5c0f-8ac1-c221f0798512", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "b176ff21-c8d5-5333-abe1-50847ddd26b8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "6272d9bb-f7dd-5b43-8db4-64817b295df0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-marqo.ipynb b/notebooks/snowflake-openai-standard-marqo.ipynb
new file mode 100644
index 00000000..24a5395c
--- /dev/null
+++ b/notebooks/snowflake-openai-standard-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "66a6659a-5332-5d82-9036-3b8d8c3ff05c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "65d93a64-34d2-53ad-9bcd-f50890404297", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "acf1fb3b-d375-55e9-b86b-4391f9ce7659", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "792a0a6f-671d-59b0-aed2-fd6606acef80", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai,snowflake]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "3ce3c4db-08c1-5d0a-86df-2f7b35b82bc2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "d79c8ca8-af6c-5103-a699-66fe5ff8f14a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-other-vectordb.ipynb b/notebooks/snowflake-openai-standard-other-vectordb.ipynb
index c7f6f41f..c8f44a12 100644
--- a/notebooks/snowflake-openai-standard-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "a53aec8f-5a05-50f3-9209-7bbf1dca9e6f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "4486c1b3-6c3e-530c-8386-50ca4b3f99e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d80b2999-7fe1-5d05-9db4-bc257c081a7c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "8bbc71c3-929d-5119-9d69-63fa678c23a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "a53aec8f-5a05-50f3-9209-7bbf1dca9e6f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "4486c1b3-6c3e-530c-8386-50ca4b3f99e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4854f259-7176-51d2-8b79-46b6f7bd61f4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "8bbc71c3-929d-5119-9d69-63fa678c23a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-vannadb.ipynb b/notebooks/snowflake-openai-standard-vannadb.ipynb
index e675ca69..ece2b721 100644
--- a/notebooks/snowflake-openai-standard-vannadb.ipynb
+++ b/notebooks/snowflake-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f13605be-e49c-5be3-87bc-2a90efb57306", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e2b2b876-72a0-5854-9177-5ad9bcc29ef7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8fb7581d-c7bf-55d2-9df9-94635c47f261", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2ebd09c1-87dd-501f-a64a-f1404386b4f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f13605be-e49c-5be3-87bc-2a90efb57306", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e2b2b876-72a0-5854-9177-5ad9bcc29ef7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f95c3155-2705-5a3f-b76c-8a56037f9090", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2ebd09c1-87dd-501f-a64a-f1404386b4f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-chromadb.ipynb b/notebooks/snowflake-openai-vanna-chromadb.ipynb
index 2d3fdbd4..12c7c31e 100644
--- a/notebooks/snowflake-openai-vanna-chromadb.ipynb
+++ b/notebooks/snowflake-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e493593c-e8c4-5cdd-bdb8-e5d8bb39b0c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "21979433-946d-5051-90ef-4b087189e318", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ecdf606b-37b1-57e1-a8c1-39ec21da67f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "fdfb6c1f-9b01-5034-9aa5-75cd2e199d28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e493593c-e8c4-5cdd-bdb8-e5d8bb39b0c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "21979433-946d-5051-90ef-4b087189e318", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "21bab113-7c3d-5f1d-a64d-fe68315482f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "fdfb6c1f-9b01-5034-9aa5-75cd2e199d28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-marqo.ipynb b/notebooks/snowflake-openai-vanna-marqo.ipynb
new file mode 100644
index 00000000..9040d28f
--- /dev/null
+++ b/notebooks/snowflake-openai-vanna-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "89fd2ba0-45a4-5e18-b00c-20d30d963c5e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ee4afbb6-cc60-578b-bba5-10f44e347353", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d2174c43-def5-5e58-b9b6-2abbf9f9c1e5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "19c92e55-83d0-5678-9b07-769285f5859f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,snowflake]'"}, {"id": "a1877af9-e758-52a4-8f21-3d7b2e0c904f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\n"}, {"id": "b0e1d67a-58fd-5bcd-b2a2-a13317fda343", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n\nvn = MyVanna()\n"}, {"id": "b7aafa49-7db5-5a0a-91f7-c04f439e83c4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-other-vectordb.ipynb b/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
index 59e42db5..87d16c85 100644
--- a/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "42212054-35d6-56f3-be8e-47af265d9df9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c38dad5b-51c7-5b49-9e21-6dbace6b923c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "bb99a675-f2f6-5840-9f13-13982b887387", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "deada75c-50bd-5cd8-95ff-e187e794d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "42212054-35d6-56f3-be8e-47af265d9df9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c38dad5b-51c7-5b49-9e21-6dbace6b923c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bd85a70-8b95-53e5-94c0-7d2bbbf45c58", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "deada75c-50bd-5cd8-95ff-e187e794d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-vannadb.ipynb b/notebooks/snowflake-openai-vanna-vannadb.ipynb
index 1cc657fc..53d11477 100644
--- a/notebooks/snowflake-openai-vanna-vannadb.ipynb
+++ b/notebooks/snowflake-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "94653bec-534d-5627-9309-0f5d8df292eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e3e271b5-29aa-5174-b69e-2260293a6c0a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8fb7581d-c7bf-55d2-9df9-94635c47f261", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "2ff941e7-752b-5c1c-bdf2-d3ab0b17cb48", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "94653bec-534d-5627-9309-0f5d8df292eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e3e271b5-29aa-5174-b69e-2260293a6c0a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f95c3155-2705-5a3f-b76c-8a56037f9090", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "2ff941e7-752b-5c1c-bdf2-d3ab0b17cb48", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-chromadb.ipynb b/notebooks/snowflake-other-llm-chromadb.ipynb
index d4ab62b5..3a298d62 100644
--- a/notebooks/snowflake-other-llm-chromadb.ipynb
+++ b/notebooks/snowflake-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f755ebca-fde7-5eb1-802d-2c830e0b6282", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0ceeb6ff-3607-5b14-9238-000548eb8fcd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "765e0228-2970-5cd0-b71c-7880112c121b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "fbd8aa4c-3dcb-52af-90a7-f358238c13b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f755ebca-fde7-5eb1-802d-2c830e0b6282", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0ceeb6ff-3607-5b14-9238-000548eb8fcd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "a6775d4d-7265-5e53-b5ae-3cfa855b66cb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "fbd8aa4c-3dcb-52af-90a7-f358238c13b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-marqo.ipynb b/notebooks/snowflake-other-llm-marqo.ipynb
new file mode 100644
index 00000000..e9cfb72c
--- /dev/null
+++ b/notebooks/snowflake-other-llm-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "6b2a5e56-05a4-5866-ba96-3dd081faf78a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "aef88a00-30a8-567a-8b2d-f6e2abdbca26", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "53b28366-9f79-5e70-bd2b-38bafaefff1c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "19c92e55-83d0-5678-9b07-769285f5859f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,snowflake]'"}, {"id": "2e26545c-6029-5c62-b146-105086c10f6d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c1812db9-f2e2-5e21-98c1-d00397d25f65", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(Marqo, MyCustomLLM):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "c96d846a-4ae7-5d10-9b45-73850616fbae", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-other-vectordb.ipynb b/notebooks/snowflake-other-llm-other-vectordb.ipynb
index f581a12f..c444e3f4 100644
--- a/notebooks/snowflake-other-llm-other-vectordb.ipynb
+++ b/notebooks/snowflake-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "cb9146eb-a0a0-5e0f-82b7-59e92f069498", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcd9d3c5-e561-5d70-a775-f0f40a772553", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "1577379b-aaa2-5b38-b4de-eeb16e98bcaf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "c1fd8a1f-0db6-5067-af77-776edcb4ed06", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "cb9146eb-a0a0-5e0f-82b7-59e92f069498", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcd9d3c5-e561-5d70-a775-f0f40a772553", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "c3cda295-5709-5e62-a60a-842a5fbeea56", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "c1fd8a1f-0db6-5067-af77-776edcb4ed06", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-vannadb.ipynb b/notebooks/snowflake-other-llm-vannadb.ipynb
index 0c3b8d9b..f6aa59c2 100644
--- a/notebooks/snowflake-other-llm-vannadb.ipynb
+++ b/notebooks/snowflake-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "5dcc604a-caa9-5f6d-ab0e-3f417516c076", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "aadb008e-18eb-5637-b13d-7a2626e8573d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "76440ab2-1a0b-5d31-a223-58bc3bd7a8fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "4442a4e5-f9cb-5f37-bf17-c51c08b5a35b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "5dcc604a-caa9-5f6d-ab0e-3f417516c076", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "aadb008e-18eb-5637-b13d-7a2626e8573d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "f3f996e3-860c-5f39-99d0-d5be662724cf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "4442a4e5-f9cb-5f37-bf17-c51c08b5a35b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-mistral-chromadb.ipynb b/notebooks/sqlite-mistral-chromadb.ipynb
index df05b83c..b6d925a3 100644
--- a/notebooks/sqlite-mistral-chromadb.ipynb
+++ b/notebooks/sqlite-mistral-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "b901f917-7d1d-5429-b7f3-628b1f239f8d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "540b0186-561a-5cfa-817b-86688e213f3e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f67b8274-c4b6-555a-a12c-a4fb0bad0e4d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30aef63a-a75b-52bd-8f9c-7fc096e0aba3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "a46a1092-3b46-50a0-bf0b-15cabe6acf0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "b901f917-7d1d-5429-b7f3-628b1f239f8d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "540b0186-561a-5cfa-817b-86688e213f3e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7e4a551e-f854-5476-90ae-bece9174dda0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30aef63a-a75b-52bd-8f9c-7fc096e0aba3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "0a4dc6a1-8cfc-53ae-a9fb-4eca76807451", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "a46a1092-3b46-50a0-bf0b-15cabe6acf0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-mistral-marqo.ipynb b/notebooks/sqlite-mistral-marqo.ipynb
new file mode 100644
index 00000000..1dba2488
--- /dev/null
+++ b/notebooks/sqlite-mistral-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "a6943a3f-7b29-5bf1-b38e-0435de3c1f13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "13c67deb-266d-5e80-a282-f743005fe2e0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d76f2144-03ef-5dfa-a0bf-58440ac7799c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "ec51e5b5-fc27-5c00-b1a8-6c9dd188e2f5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,mistralai]'"}, {"id": "696e9c9c-d724-56db-9a06-10a66dc15b0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "de83d46a-6116-5978-8b9a-f0f586a583bd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, Mistral):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "7aa1dfbd-b8fb-529e-afca-c8521bcf44ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-mistral-other-vectordb.ipynb b/notebooks/sqlite-mistral-other-vectordb.ipynb
index 0b339b17..5126de48 100644
--- a/notebooks/sqlite-mistral-other-vectordb.ipynb
+++ b/notebooks/sqlite-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0aeab80e-f7e9-5ae8-967e-72d92129951d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2ece6ad9-270f-59ff-a4aa-db2654c5eafd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5f5028e3-8438-5909-a9c0-d8f2db011844", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "0c312e61-5705-58ef-8d4c-2e9d0e29c6c1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0aeab80e-f7e9-5ae8-967e-72d92129951d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2ece6ad9-270f-59ff-a4aa-db2654c5eafd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "a390e116-d2ef-5b0a-b9bf-2453b1edc365", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "0c312e61-5705-58ef-8d4c-2e9d0e29c6c1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-mistral-vannadb.ipynb b/notebooks/sqlite-mistral-vannadb.ipynb
index 6fec8066..bccd8ff2 100644
--- a/notebooks/sqlite-mistral-vannadb.ipynb
+++ b/notebooks/sqlite-mistral-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "5c6445ea-8f7a-5824-b2df-adc2e1e7f405", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cd81f971-231c-5588-ade6-ba27ec4cbd26", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "18a145ff-b10a-5c6f-a734-82c0fc68dce9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "3d804717-e109-5360-9f2d-98de92bf94c5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "5c6445ea-8f7a-5824-b2df-adc2e1e7f405", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cd81f971-231c-5588-ade6-ba27ec4cbd26", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "2aa4d769-b9da-5d9c-94b4-7e045e627441", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "2aa3c756-d1ad-5b3c-a161-5bad6f77d594", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "3d804717-e109-5360-9f2d-98de92bf94c5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-chromadb.ipynb b/notebooks/sqlite-openai-azure-chromadb.ipynb
index d8d145b6..33c1d3ce 100644
--- a/notebooks/sqlite-openai-azure-chromadb.ipynb
+++ b/notebooks/sqlite-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "262d99dc-b1f5-52a2-b3dd-c959896d40d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b988d8ae-969c-5d2d-9548-d79ab9566238", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "af9b8e47-251a-57c4-bbf8-b44aff8fd7d4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "860d03fb-aa03-53d2-b703-3ca6f77232b0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "262d99dc-b1f5-52a2-b3dd-c959896d40d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b988d8ae-969c-5d2d-9548-d79ab9566238", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5fda8cf1-1b74-59c8-beb9-14efb0907167", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "8366ca25-d088-55cc-b611-89affa6daef4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "860d03fb-aa03-53d2-b703-3ca6f77232b0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-marqo.ipynb b/notebooks/sqlite-openai-azure-marqo.ipynb
new file mode 100644
index 00000000..d06bf1db
--- /dev/null
+++ b/notebooks/sqlite-openai-azure-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "831c5414-60d7-548f-933c-a363ff7d1031", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2a2e13d1-9783-58bc-94aa-71e814b971a8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0245269a-3f34-545b-b7a1-adaa2bdb5d7e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3d31b251-dcb5-5c7d-b5da-b692698ecdfa", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c13f4f8a-388e-5e54-968e-86ee639ac7cf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "c4c2f9ab-3bda-5cbe-a631-070f5c21b730", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-other-vectordb.ipynb b/notebooks/sqlite-openai-azure-other-vectordb.ipynb
index 5f32e90b..41c29d73 100644
--- a/notebooks/sqlite-openai-azure-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "48659c68-dbb3-5f89-a750-75a3bd1d4872", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ab32bcb2-1993-56d5-886f-8262fde1de35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "80ef7877-e065-5927-8d3d-3c7a24883e17", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "7ac5fa06-681a-5c2f-abdc-8541cfe6f770", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "48659c68-dbb3-5f89-a750-75a3bd1d4872", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ab32bcb2-1993-56d5-886f-8262fde1de35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "1f1b6446-b752-5a49-b398-ea551f8dd52b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "7ac5fa06-681a-5c2f-abdc-8541cfe6f770", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-vannadb.ipynb b/notebooks/sqlite-openai-azure-vannadb.ipynb
index a9e0cd74..9d72fdb9 100644
--- a/notebooks/sqlite-openai-azure-vannadb.ipynb
+++ b/notebooks/sqlite-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "79b5f2a1-f4ac-5c48-9451-4d4c5e9bbb4d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "09f17b31-ae67-5c56-a75e-86914502d751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "c919366c-b0da-5326-9b4f-a5e5ee71b7be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "e3bfcfae-1df1-5e15-a4f1-e49fa8aed61b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "79b5f2a1-f4ac-5c48-9451-4d4c5e9bbb4d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "09f17b31-ae67-5c56-a75e-86914502d751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "fd291717-1477-5963-92eb-38f3d22c33c4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "09d44096-8205-52e1-b761-4f7a6f0ddb2e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "e3bfcfae-1df1-5e15-a4f1-e49fa8aed61b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-chromadb.ipynb b/notebooks/sqlite-openai-standard-chromadb.ipynb
index d740d31e..a89947cd 100644
--- a/notebooks/sqlite-openai-standard-chromadb.ipynb
+++ b/notebooks/sqlite-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "af8cd30a-415d-5ac9-9511-853d099fca5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "110b678c-bd21-5651-9f10-6049f62e5edc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ba1444db-44d3-5ca7-8461-308b312e053f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "70963293-29e6-57c4-95f1-b5045d63a75b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "af8cd30a-415d-5ac9-9511-853d099fca5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "110b678c-bd21-5651-9f10-6049f62e5edc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "9de4d57f-8434-54b0-a7c1-93d21e068296", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "28a8d8a1-669f-5f8b-82c4-73a0eb221d64", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "70963293-29e6-57c4-95f1-b5045d63a75b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-marqo.ipynb b/notebooks/sqlite-openai-standard-marqo.ipynb
new file mode 100644
index 00000000..330b3b1e
--- /dev/null
+++ b/notebooks/sqlite-openai-standard-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "1eaacb62-b6c9-5f78-9feb-8444240b5cbe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "5d155d07-2d90-5dfc-b869-1ca6ba2259b9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "08ce4bc7-9011-5067-b634-ae6f1ef24d7c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3d31b251-dcb5-5c7d-b5da-b692698ecdfa", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo,openai]'"}, {"id": "dc8ae8d1-d08e-5a8c-a806-09ba465d7761", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "3ce3c4db-08c1-5d0a-86df-2f7b35b82bc2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo, OpenAI_Chat):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "f652769f-a3a3-5648-85d5-e74cb63c251e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-other-vectordb.ipynb b/notebooks/sqlite-openai-standard-other-vectordb.ipynb
index 83f0ed78..1b47f660 100644
--- a/notebooks/sqlite-openai-standard-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "8fedd282-2590-569a-93ce-ab6c6a4fa48a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2218e0a1-fa4e-56e9-9926-933c4a92043b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3da5bae9-ef50-5164-9b90-2eeb8c07e96f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "aac12e94-b008-55dc-af6f-3199c9b8fb36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "8fedd282-2590-569a-93ce-ab6c6a4fa48a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2218e0a1-fa4e-56e9-9926-933c4a92043b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d90017bc-4501-5f30-8c57-142ae218be4e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "aac12e94-b008-55dc-af6f-3199c9b8fb36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-vannadb.ipynb b/notebooks/sqlite-openai-standard-vannadb.ipynb
index 42b256e7..2bfe7dfd 100644
--- a/notebooks/sqlite-openai-standard-vannadb.ipynb
+++ b/notebooks/sqlite-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "078f7efe-c23e-5d4c-98ee-a1d3e014992f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "3b9fc8e7-75e3-5625-b12b-40759de02591", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "394e0f93-821f-5ba7-9cf5-4574fbb026bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2de3b0d3-71e4-5335-9e65-29c27aff8e1d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "078f7efe-c23e-5d4c-98ee-a1d3e014992f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "3b9fc8e7-75e3-5625-b12b-40759de02591", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "fdaba019-3968-51d4-89c1-cdb4d809ea77", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1fc13242-ce0f-53e5-832e-ff33197d2eef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2de3b0d3-71e4-5335-9e65-29c27aff8e1d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-chromadb.ipynb b/notebooks/sqlite-openai-vanna-chromadb.ipynb
index 41237559..251edb3f 100644
--- a/notebooks/sqlite-openai-vanna-chromadb.ipynb
+++ b/notebooks/sqlite-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "9d6519a8-e544-5523-82ce-97784be01264", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "96bb2afc-e019-52bb-b5ae-575a74d42bd3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "63b0e89e-0abb-50e3-8485-70155d8aa30b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "13991486-a5af-5687-9351-5b4159aeb502", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "9d6519a8-e544-5523-82ce-97784be01264", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "96bb2afc-e019-52bb-b5ae-575a74d42bd3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f96244dc-c42f-5cb8-becc-d03958136056", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "13991486-a5af-5687-9351-5b4159aeb502", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-marqo.ipynb b/notebooks/sqlite-openai-vanna-marqo.ipynb
new file mode 100644
index 00000000..c93e8df1
--- /dev/null
+++ b/notebooks/sqlite-openai-vanna-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "859cb351-0751-5448-a269-505618a2bc89", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "af96ca79-ccab-57c3-a8ef-d222d781d4c1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d1ab7307-853f-5bf8-a832-452bba751f14", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30f0eccb-7684-5642-98ae-31bee8662133", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo]'"}, {"id": "a1877af9-e758-52a4-8f21-3d7b2e0c904f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.marqo.marqo import Marqo\n"}, {"id": "b0e1d67a-58fd-5bcd-b2a2-a13317fda343", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(Marqo):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n\nvn = MyVanna()\n"}, {"id": "0dff5e7b-d827-51d2-bf3b-21cb3c379ea6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-other-vectordb.ipynb b/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
index d0746d79..f8965132 100644
--- a/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "506bdbf0-9097-5ea8-a3ac-fbbee8ce189c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d0d5c54c-adbb-572b-a2e5-bd708e7d6392", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da681ba4-1d8d-5ff4-be35-5803a677a21c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "f5348d7a-0fae-5651-880e-1b365a3a1257", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "506bdbf0-9097-5ea8-a3ac-fbbee8ce189c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d0d5c54c-adbb-572b-a2e5-bd708e7d6392", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5d5bf8e6-1051-5524-a72a-ed51f635cd9a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "f5348d7a-0fae-5651-880e-1b365a3a1257", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-vannadb.ipynb b/notebooks/sqlite-openai-vanna-vannadb.ipynb
index 9df42a31..f530b5c2 100644
--- a/notebooks/sqlite-openai-vanna-vannadb.ipynb
+++ b/notebooks/sqlite-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "3b9016b5-2bfd-5bd2-98e7-8ba4a9dec1be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e5b1e80e-8db4-5085-8d4f-6e34ca21d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "394e0f93-821f-5ba7-9cf5-4574fbb026bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "1f3ff57f-259c-548e-bf2c-92170dfcf1b5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "3b9016b5-2bfd-5bd2-98e7-8ba4a9dec1be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e5b1e80e-8db4-5085-8d4f-6e34ca21d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "fdaba019-3968-51d4-89c1-cdb4d809ea77", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "1f3ff57f-259c-548e-bf2c-92170dfcf1b5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-chromadb.ipynb b/notebooks/sqlite-other-llm-chromadb.ipynb
index 80eea45f..59a93999 100644
--- a/notebooks/sqlite-other-llm-chromadb.ipynb
+++ b/notebooks/sqlite-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "d3c0d01c-97e4-50e9-8e05-32bbed37f5d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a3b250e1-ea89-548a-ac9d-73350818add5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "8b5cba50-be4a-5111-8bbf-c7235bdd38c2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "b2854838-2902-59d3-b55b-bf59f3a4d888", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "d3c0d01c-97e4-50e9-8e05-32bbed37f5d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a3b250e1-ea89-548a-ac9d-73350818add5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "8e191684-d134-527f-b286-03f67c8f961a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "ad2c6517-9727-56fb-befe-a4108859aa3b", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "b2854838-2902-59d3-b55b-bf59f3a4d888", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-marqo.ipynb b/notebooks/sqlite-other-llm-marqo.ipynb
new file mode 100644
index 00000000..8a19411c
--- /dev/null
+++ b/notebooks/sqlite-other-llm-marqo.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "5a3d07b2-2583-51d2-a8c1-c0321bfc439b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Marqo\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "98059112-2f3f-5c56-b579-836d1282aaec", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "7e07cffc-16a2-53f0-933d-44332ed398d2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30f0eccb-7684-5642-98ae-31bee8662133", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[marqo]'"}, {"id": "2e26545c-6029-5c62-b146-105086c10f6d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.marqo.marqo import Marqo\n"}, {"id": "c1812db9-f2e2-5e21-98c1-d00397d25f65", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(Marqo, MyCustomLLM):\n    def __init__(self, config=None):\n        Marqo.__init__(self, config={'marqo_url': MARQO_URL, 'marqo_model': MARQO_MODEL})\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "22414bc8-e78e-5a42-8591-07389723db9b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-other-vectordb.ipynb b/notebooks/sqlite-other-llm-other-vectordb.ipynb
index 51e6a241..3f967feb 100644
--- a/notebooks/sqlite-other-llm-other-vectordb.ipynb
+++ b/notebooks/sqlite-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e7a07123-59ae-5e9d-9fc4-8606541246e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f8bf2a61-7dfe-598e-8f6b-d29dc7b440ea", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "e34c0da3-6b6a-5a04-aa97-d3dbcef9aec7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ef8ef020-2101-56df-8aa3-a09caae07971", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e7a07123-59ae-5e9d-9fc4-8606541246e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f8bf2a61-7dfe-598e-8f6b-d29dc7b440ea", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "51147f2e-bdf1-5d9e-acf5-f079a111d323", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ef8ef020-2101-56df-8aa3-a09caae07971", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-vannadb.ipynb b/notebooks/sqlite-other-llm-vannadb.ipynb
index b98764cb..6ccb1857 100644
--- a/notebooks/sqlite-other-llm-vannadb.ipynb
+++ b/notebooks/sqlite-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "22fc9194-d307-554e-84c7-5c61665bd738", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ba3c203c-27c0-589c-82e4-c1411f3af28d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "7090c61a-c77d-565f-92d5-3faf38d8e9bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "b141daf0-a79d-59a4-811c-1626ba88d07a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "22fc9194-d307-554e-84c7-5c61665bd738", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ba3c203c-27c0-589c-82e4-c1411f3af28d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "1d2cf467-7d3c-5bd4-838e-9d462a6e64cb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8ac7e323-be70-5fee-9d96-69868af0ff99", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "b141daf0-a79d-59a4-811c-1626ba88d07a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index ca6454e5..821c4b19 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
 
 [project]
 name = "vanna"
-version = "0.0.30"
+version = "0.0.31"
 authors = [
   { name="Zain Hoda", email="zain@vanna.ai" },
 ]
@@ -35,3 +35,4 @@ chromadb = ["chromadb"]
 openai = ["openai"]
 mistralai = ["mistralai"]
 gemini = ["google-generativeai"]
+marqo = ["marqo"]

From ff468edbae1f7655d40f081dae18b6b14957cbe7 Mon Sep 17 00:00:00 2001
From: Zain Hoda <7146154+zainhoda@users.noreply.github.com>
Date: Mon, 8 Jan 2024 16:40:27 -0500
Subject: [PATCH 3/3] Fix other vectordb

---
 notebooks/bigquery-mistral-other-vectordb.ipynb               | 2 +-
 notebooks/bigquery-openai-azure-other-vectordb.ipynb          | 2 +-
 notebooks/bigquery-openai-standard-other-vectordb.ipynb       | 2 +-
 notebooks/bigquery-openai-vanna-other-vectordb.ipynb          | 2 +-
 notebooks/bigquery-other-llm-other-vectordb.ipynb             | 2 +-
 notebooks/other-database-mistral-other-vectordb.ipynb         | 2 +-
 notebooks/other-database-openai-azure-other-vectordb.ipynb    | 2 +-
 notebooks/other-database-openai-standard-other-vectordb.ipynb | 2 +-
 notebooks/other-database-openai-vanna-other-vectordb.ipynb    | 2 +-
 notebooks/other-database-other-llm-other-vectordb.ipynb       | 2 +-
 notebooks/postgres-mistral-other-vectordb.ipynb               | 2 +-
 notebooks/postgres-openai-azure-other-vectordb.ipynb          | 2 +-
 notebooks/postgres-openai-standard-other-vectordb.ipynb       | 2 +-
 notebooks/postgres-openai-vanna-other-vectordb.ipynb          | 2 +-
 notebooks/postgres-other-llm-other-vectordb.ipynb             | 2 +-
 notebooks/snowflake-mistral-other-vectordb.ipynb              | 2 +-
 notebooks/snowflake-openai-azure-other-vectordb.ipynb         | 2 +-
 notebooks/snowflake-openai-standard-other-vectordb.ipynb      | 2 +-
 notebooks/snowflake-openai-vanna-other-vectordb.ipynb         | 2 +-
 notebooks/snowflake-other-llm-other-vectordb.ipynb            | 2 +-
 notebooks/sqlite-mistral-other-vectordb.ipynb                 | 2 +-
 notebooks/sqlite-openai-azure-other-vectordb.ipynb            | 2 +-
 notebooks/sqlite-openai-standard-other-vectordb.ipynb         | 2 +-
 notebooks/sqlite-openai-vanna-other-vectordb.ipynb            | 2 +-
 notebooks/sqlite-other-llm-other-vectordb.ipynb               | 2 +-
 25 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/notebooks/bigquery-mistral-other-vectordb.ipynb b/notebooks/bigquery-mistral-other-vectordb.ipynb
index aafa6fec..7af8fa24 100644
--- a/notebooks/bigquery-mistral-other-vectordb.ipynb
+++ b/notebooks/bigquery-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "95ac2fed-f042-5ddb-8a43-6055e0eb437b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a90edad1-d1a3-5c7c-83fb-e4455e43ea8c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "88901dbc-14f4-5279-a59f-268e126a26f9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "81efadca-5a5f-5a99-8133-b7fa84f17803", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,bigquery]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "8530cf11-bed9-5933-b8ea-644361db0e5e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "95ac2fed-f042-5ddb-8a43-6055e0eb437b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a90edad1-d1a3-5c7c-83fb-e4455e43ea8c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "88901dbc-14f4-5279-a59f-268e126a26f9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "81efadca-5a5f-5a99-8133-b7fa84f17803", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,bigquery]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "edc45a16-5782-5d88-9f4a-c405c1f79dea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "8530cf11-bed9-5933-b8ea-644361db0e5e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-other-vectordb.ipynb b/notebooks/bigquery-openai-azure-other-vectordb.ipynb
index f29ef090..f3a3645e 100644
--- a/notebooks/bigquery-openai-azure-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "35185a9e-35a3-56fe-b403-fe55f23a645e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ae9f6a12-733a-59e3-8ea5-6e747247433c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4042d182-e243-5d76-a804-54a96b57e635", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "cc10852e-5853-5c8c-a6bd-0561797b1386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "35185a9e-35a3-56fe-b403-fe55f23a645e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ae9f6a12-733a-59e3-8ea5-6e747247433c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4042d182-e243-5d76-a804-54a96b57e635", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "2143df43-3835-5c93-b9fb-b61a20492d9c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "cc10852e-5853-5c8c-a6bd-0561797b1386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-other-vectordb.ipynb b/notebooks/bigquery-openai-standard-other-vectordb.ipynb
index 69808418..4508566b 100644
--- a/notebooks/bigquery-openai-standard-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e613d63a-b713-506f-8fb3-41b2c0a8e863", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "60917c76-36c7-55b7-b1c4-5c82915d0d08", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "2c95993b-0824-50f4-8d2e-2418fedfc8fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2718a3d3-183c-50b5-b959-be79bd3b071f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e613d63a-b713-506f-8fb3-41b2c0a8e863", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "60917c76-36c7-55b7-b1c4-5c82915d0d08", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "2c95993b-0824-50f4-8d2e-2418fedfc8fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "45f49779-04b9-574f-b573-4f0afb619e7e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2718a3d3-183c-50b5-b959-be79bd3b071f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-other-vectordb.ipynb b/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
index 14fb3c9a..bfef9310 100644
--- a/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "987b3c74-035d-562b-9288-167f1a027019", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8468a145-ef33-5a52-b9dd-f0f5280b0e99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4d924c82-d069-5d4b-bc4a-8985f20e4507", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "cc057755-c7d2-5e29-8f5d-1c8a5627898b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "987b3c74-035d-562b-9288-167f1a027019", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8468a145-ef33-5a52-b9dd-f0f5280b0e99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4d924c82-d069-5d4b-bc4a-8985f20e4507", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "57fbc78c-6a54-5f06-98d8-69f315a380b4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "cc057755-c7d2-5e29-8f5d-1c8a5627898b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-other-vectordb.ipynb b/notebooks/bigquery-other-llm-other-vectordb.ipynb
index 6330773d..af6a0038 100644
--- a/notebooks/bigquery-other-llm-other-vectordb.ipynb
+++ b/notebooks/bigquery-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "329a34b6-ac61-5e7f-bd53-4dea75316751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "46ab4cb5-529f-5357-b72a-b6d215937d54", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "075ce30c-1025-5b09-b870-888aca19466b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "874380de-32c3-5bfc-b26b-3a4d227543fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "329a34b6-ac61-5e7f-bd53-4dea75316751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "46ab4cb5-529f-5357-b72a-b6d215937d54", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "075ce30c-1025-5b09-b870-888aca19466b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "2ff3ca97-cb35-56c7-9263-a3613e4ce005", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "874380de-32c3-5bfc-b26b-3a4d227543fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-mistral-other-vectordb.ipynb b/notebooks/other-database-mistral-other-vectordb.ipynb
index f9f1e085..47a85a9e 100644
--- a/notebooks/other-database-mistral-other-vectordb.ipynb
+++ b/notebooks/other-database-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "924358e7-d53d-5554-baff-faa508826010", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "69c22c46-b8d4-5ad1-a81a-f71314449d2a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "94226258-345b-5cae-b6df-88c323f134d6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2269b218-ec69-598e-a8fa-19a8d7777081", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "924358e7-d53d-5554-baff-faa508826010", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "69c22c46-b8d4-5ad1-a81a-f71314449d2a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "94226258-345b-5cae-b6df-88c323f134d6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "edc45a16-5782-5d88-9f4a-c405c1f79dea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2269b218-ec69-598e-a8fa-19a8d7777081", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-other-vectordb.ipynb b/notebooks/other-database-openai-azure-other-vectordb.ipynb
index 203e24a4..cb431d31 100644
--- a/notebooks/other-database-openai-azure-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "28e43ee2-89a3-5c6e-972c-18e14187ecbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "98bd55a9-8534-57e0-bbf2-790053912d0d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5307d257-caf4-590b-90ed-98348d2066f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "06e1d645-97b3-5338-b39a-ed29e0adae10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "28e43ee2-89a3-5c6e-972c-18e14187ecbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "98bd55a9-8534-57e0-bbf2-790053912d0d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5307d257-caf4-590b-90ed-98348d2066f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "2143df43-3835-5c93-b9fb-b61a20492d9c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "06e1d645-97b3-5338-b39a-ed29e0adae10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-other-vectordb.ipynb b/notebooks/other-database-openai-standard-other-vectordb.ipynb
index 687c4578..8d1123ba 100644
--- a/notebooks/other-database-openai-standard-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "83e6caa0-808d-5e3f-a3bc-dd6055253309", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0dc9e571-d346-5296-9c2b-6288e2e7fdfd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "02a256fb-5c94-572c-aa70-8d52c604073b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "863e74db-ce0d-5c2b-abcf-a57a0583b638", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "83e6caa0-808d-5e3f-a3bc-dd6055253309", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0dc9e571-d346-5296-9c2b-6288e2e7fdfd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "02a256fb-5c94-572c-aa70-8d52c604073b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "45f49779-04b9-574f-b573-4f0afb619e7e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "863e74db-ce0d-5c2b-abcf-a57a0583b638", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-other-vectordb.ipynb b/notebooks/other-database-openai-vanna-other-vectordb.ipynb
index ee85ca8d..e42ec9b6 100644
--- a/notebooks/other-database-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "dea72209-4768-5f52-ba75-ae7671cc46fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a35e6042-02e6-5934-b6c7-45abff11bcef", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "31e4174d-4d1d-51db-9a91-37d3ba8a2840", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "43195316-d36c-517e-a899-130dc36e8acd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "dea72209-4768-5f52-ba75-ae7671cc46fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a35e6042-02e6-5934-b6c7-45abff11bcef", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "31e4174d-4d1d-51db-9a91-37d3ba8a2840", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "57fbc78c-6a54-5f06-98d8-69f315a380b4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "43195316-d36c-517e-a899-130dc36e8acd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-other-vectordb.ipynb b/notebooks/other-database-other-llm-other-vectordb.ipynb
index 74632fc6..9a013dbf 100644
--- a/notebooks/other-database-other-llm-other-vectordb.ipynb
+++ b/notebooks/other-database-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "d4a7bd4f-e238-5168-8c26-03ab7d9b6ceb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6b7aad07-6701-5974-9b5a-c6ed21cc3eca", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "445e8689-7756-5dab-8e76-46f1601ed2b1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "14a246cb-cc7e-5bb1-b931-cf58d8c86b53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "d4a7bd4f-e238-5168-8c26-03ab7d9b6ceb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6b7aad07-6701-5974-9b5a-c6ed21cc3eca", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "445e8689-7756-5dab-8e76-46f1601ed2b1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "2ff3ca97-cb35-56c7-9263-a3613e4ce005", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "14a246cb-cc7e-5bb1-b931-cf58d8c86b53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-mistral-other-vectordb.ipynb b/notebooks/postgres-mistral-other-vectordb.ipynb
index b3a965cc..330f057b 100644
--- a/notebooks/postgres-mistral-other-vectordb.ipynb
+++ b/notebooks/postgres-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "1bf7ca05-0a4b-56c4-8958-79b30e5afc2e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "13f1c9f5-c46b-56da-a35a-83299b1735b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "e8abf792-ee0c-5975-95ff-67d2b0ddbaba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "72ba7676-e2d8-5a7e-ac6e-f7578822efb5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,postgres]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "ef45e977-adff-5d4f-a769-28be071afb5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "1bf7ca05-0a4b-56c4-8958-79b30e5afc2e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "13f1c9f5-c46b-56da-a35a-83299b1735b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "e8abf792-ee0c-5975-95ff-67d2b0ddbaba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "72ba7676-e2d8-5a7e-ac6e-f7578822efb5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,postgres]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "edc45a16-5782-5d88-9f4a-c405c1f79dea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "ef45e977-adff-5d4f-a769-28be071afb5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-other-vectordb.ipynb b/notebooks/postgres-openai-azure-other-vectordb.ipynb
index 6f0f9489..5cf53dbe 100644
--- a/notebooks/postgres-openai-azure-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6c5bdd16-d84b-527f-a805-d01f53e7337c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "55d62c18-7f8e-53cf-8a60-fb5abd6efee4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4aafd94a-06eb-51e1-8df7-409c90675dc5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "e11da77b-6e86-53a2-9c7b-700c2550921e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6c5bdd16-d84b-527f-a805-d01f53e7337c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "55d62c18-7f8e-53cf-8a60-fb5abd6efee4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4aafd94a-06eb-51e1-8df7-409c90675dc5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "2143df43-3835-5c93-b9fb-b61a20492d9c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "e11da77b-6e86-53a2-9c7b-700c2550921e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-other-vectordb.ipynb b/notebooks/postgres-openai-standard-other-vectordb.ipynb
index 4fa9b847..2b96c25f 100644
--- a/notebooks/postgres-openai-standard-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "ad6a7ca6-de11-5e6c-accf-b908b3b5f536", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cc22b65c-c118-568d-95b5-320743ba50ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3ce03f11-0930-5754-a09e-dd673300f806", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "b3cf62dc-7c47-56f5-8dd9-65fc75597155", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "ad6a7ca6-de11-5e6c-accf-b908b3b5f536", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cc22b65c-c118-568d-95b5-320743ba50ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3ce03f11-0930-5754-a09e-dd673300f806", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "45f49779-04b9-574f-b573-4f0afb619e7e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "b3cf62dc-7c47-56f5-8dd9-65fc75597155", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-other-vectordb.ipynb b/notebooks/postgres-openai-vanna-other-vectordb.ipynb
index 0042ebfc..7cc9a070 100644
--- a/notebooks/postgres-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "c6b9a340-0204-5267-b461-47450cdb8a2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "502499f2-8200-5bdf-bc7d-4bd15b1e7f63", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "adec8661-7621-5bfe-a9aa-762ff25e5846", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ee11dcde-9152-5e53-860a-bb72016db15b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "c6b9a340-0204-5267-b461-47450cdb8a2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "502499f2-8200-5bdf-bc7d-4bd15b1e7f63", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "adec8661-7621-5bfe-a9aa-762ff25e5846", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "57fbc78c-6a54-5f06-98d8-69f315a380b4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ee11dcde-9152-5e53-860a-bb72016db15b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-other-vectordb.ipynb b/notebooks/postgres-other-llm-other-vectordb.ipynb
index f9a41316..5c21a0dd 100644
--- a/notebooks/postgres-other-llm-other-vectordb.ipynb
+++ b/notebooks/postgres-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "3a23b994-dde6-5290-a4ae-5c0fbc8143d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1a323199-007a-5773-aab8-b6510f11824b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "4ce94f6b-39db-5297-af88-8ae081327b78", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "8af744f9-5cb5-5a01-b58b-d736caac0164", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "3a23b994-dde6-5290-a4ae-5c0fbc8143d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1a323199-007a-5773-aab8-b6510f11824b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "4ce94f6b-39db-5297-af88-8ae081327b78", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "2ff3ca97-cb35-56c7-9263-a3613e4ce005", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "8af744f9-5cb5-5a01-b58b-d736caac0164", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-mistral-other-vectordb.ipynb b/notebooks/snowflake-mistral-other-vectordb.ipynb
index ce8da7a3..7f06a9df 100644
--- a/notebooks/snowflake-mistral-other-vectordb.ipynb
+++ b/notebooks/snowflake-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "bb27d995-aa3f-5004-9976-87e64a33a9d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "59581b59-5760-589b-b8b8-423389281910", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ca149400-a8ed-56d7-9da0-833d08e8bbac", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "037d295c-00dd-5d10-9d2f-0e2b81de13d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,snowflake]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "9f1fa815-47fd-5a51-a79c-27065a397031", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "bb27d995-aa3f-5004-9976-87e64a33a9d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "59581b59-5760-589b-b8b8-423389281910", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ca149400-a8ed-56d7-9da0-833d08e8bbac", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "037d295c-00dd-5d10-9d2f-0e2b81de13d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,snowflake]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "edc45a16-5782-5d88-9f4a-c405c1f79dea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "9f1fa815-47fd-5a51-a79c-27065a397031", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-other-vectordb.ipynb b/notebooks/snowflake-openai-azure-other-vectordb.ipynb
index 6e6d7738..0efbfd28 100644
--- a/notebooks/snowflake-openai-azure-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6aac9f07-876b-5289-9f4e-7524132617ad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1124d018-1c19-5e7e-9bc5-0ef3f2d34a44", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0952250a-a0b8-57bf-bf8c-09849612f91d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "a90a47b5-ed37-5e9a-abfb-044d3b6938e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6aac9f07-876b-5289-9f4e-7524132617ad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1124d018-1c19-5e7e-9bc5-0ef3f2d34a44", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0952250a-a0b8-57bf-bf8c-09849612f91d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "2143df43-3835-5c93-b9fb-b61a20492d9c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "a90a47b5-ed37-5e9a-abfb-044d3b6938e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-other-vectordb.ipynb b/notebooks/snowflake-openai-standard-other-vectordb.ipynb
index c8f44a12..b8bf7fca 100644
--- a/notebooks/snowflake-openai-standard-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "a53aec8f-5a05-50f3-9209-7bbf1dca9e6f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "4486c1b3-6c3e-530c-8386-50ca4b3f99e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4854f259-7176-51d2-8b79-46b6f7bd61f4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "8bbc71c3-929d-5119-9d69-63fa678c23a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "a53aec8f-5a05-50f3-9209-7bbf1dca9e6f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "4486c1b3-6c3e-530c-8386-50ca4b3f99e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4854f259-7176-51d2-8b79-46b6f7bd61f4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "45f49779-04b9-574f-b573-4f0afb619e7e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "8bbc71c3-929d-5119-9d69-63fa678c23a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-other-vectordb.ipynb b/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
index 87d16c85..d1a2a6a2 100644
--- a/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "42212054-35d6-56f3-be8e-47af265d9df9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c38dad5b-51c7-5b49-9e21-6dbace6b923c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bd85a70-8b95-53e5-94c0-7d2bbbf45c58", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "deada75c-50bd-5cd8-95ff-e187e794d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "42212054-35d6-56f3-be8e-47af265d9df9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c38dad5b-51c7-5b49-9e21-6dbace6b923c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bd85a70-8b95-53e5-94c0-7d2bbbf45c58", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "57fbc78c-6a54-5f06-98d8-69f315a380b4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "deada75c-50bd-5cd8-95ff-e187e794d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-other-vectordb.ipynb b/notebooks/snowflake-other-llm-other-vectordb.ipynb
index c444e3f4..832eb61b 100644
--- a/notebooks/snowflake-other-llm-other-vectordb.ipynb
+++ b/notebooks/snowflake-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "cb9146eb-a0a0-5e0f-82b7-59e92f069498", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcd9d3c5-e561-5d70-a775-f0f40a772553", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "c3cda295-5709-5e62-a60a-842a5fbeea56", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "c1fd8a1f-0db6-5067-af77-776edcb4ed06", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "cb9146eb-a0a0-5e0f-82b7-59e92f069498", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcd9d3c5-e561-5d70-a775-f0f40a772553", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "c3cda295-5709-5e62-a60a-842a5fbeea56", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "2ff3ca97-cb35-56c7-9263-a3613e4ce005", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "c1fd8a1f-0db6-5067-af77-776edcb4ed06", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-mistral-other-vectordb.ipynb b/notebooks/sqlite-mistral-other-vectordb.ipynb
index 5126de48..55cd8c2d 100644
--- a/notebooks/sqlite-mistral-other-vectordb.ipynb
+++ b/notebooks/sqlite-mistral-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0aeab80e-f7e9-5ae8-967e-72d92129951d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2ece6ad9-270f-59ff-a4aa-db2654c5eafd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "a390e116-d2ef-5b0a-b9bf-2453b1edc365", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "dc2d9cc7-badf-50a4-a832-1f67f2d773fb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "0c312e61-5705-58ef-8d4c-2e9d0e29c6c1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0aeab80e-f7e9-5ae8-967e-72d92129951d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2ece6ad9-270f-59ff-a4aa-db2654c5eafd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "a390e116-d2ef-5b0a-b9bf-2453b1edc365", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "edc45a16-5782-5d88-9f4a-c405c1f79dea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        Mistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "0c312e61-5705-58ef-8d4c-2e9d0e29c6c1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-other-vectordb.ipynb b/notebooks/sqlite-openai-azure-other-vectordb.ipynb
index 41c29d73..d4910319 100644
--- a/notebooks/sqlite-openai-azure-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "48659c68-dbb3-5f89-a750-75a3bd1d4872", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ab32bcb2-1993-56d5-886f-8262fde1de35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "1f1b6446-b752-5a49-b398-ea551f8dd52b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "b7eb471b-9178-5694-bdab-5d31a85d2557", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "7ac5fa06-681a-5c2f-abdc-8541cfe6f770", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "48659c68-dbb3-5f89-a750-75a3bd1d4872", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ab32bcb2-1993-56d5-886f-8262fde1de35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "1f1b6446-b752-5a49-b398-ea551f8dd52b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "2143df43-3835-5c93-b9fb-b61a20492d9c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "7ac5fa06-681a-5c2f-abdc-8541cfe6f770", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-other-vectordb.ipynb b/notebooks/sqlite-openai-standard-other-vectordb.ipynb
index 1b47f660..e7e6012d 100644
--- a/notebooks/sqlite-openai-standard-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "8fedd282-2590-569a-93ce-ab6c6a4fa48a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2218e0a1-fa4e-56e9-9926-933c4a92043b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d90017bc-4501-5f30-8c57-142ae218be4e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "d40c907e-2db8-5fdb-8c60-26ba62f15a63", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "aac12e94-b008-55dc-af6f-3199c9b8fb36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "8fedd282-2590-569a-93ce-ab6c6a4fa48a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2218e0a1-fa4e-56e9-9926-933c4a92043b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d90017bc-4501-5f30-8c57-142ae218be4e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "45f49779-04b9-574f-b573-4f0afb619e7e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "aac12e94-b008-55dc-af6f-3199c9b8fb36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-other-vectordb.ipynb b/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
index f8965132..786a4172 100644
--- a/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "506bdbf0-9097-5ea8-a3ac-fbbee8ce189c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d0d5c54c-adbb-572b-a2e5-bd708e7d6392", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5d5bf8e6-1051-5524-a72a-ed51f635cd9a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "f5348d7a-0fae-5651-880e-1b365a3a1257", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "506bdbf0-9097-5ea8-a3ac-fbbee8ce189c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d0d5c54c-adbb-572b-a2e5-bd708e7d6392", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5d5bf8e6-1051-5524-a72a-ed51f635cd9a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "57fbc78c-6a54-5f06-98d8-69f315a380b4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "f5348d7a-0fae-5651-880e-1b365a3a1257", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-other-vectordb.ipynb b/notebooks/sqlite-other-llm-other-vectordb.ipynb
index 3f967feb..fc12e205 100644
--- a/notebooks/sqlite-other-llm-other-vectordb.ipynb
+++ b/notebooks/sqlite-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e7a07123-59ae-5e9d-9fc4-8606541246e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f8bf2a61-7dfe-598e-8f6b-d29dc7b440ea", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "51147f2e-bdf1-5d9e-acf5-f079a111d323", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "582c8aa0-da04-5d07-bc18-ada355d2c089", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ef8ef020-2101-56df-8aa3-a09caae07971", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e7a07123-59ae-5e9d-9fc4-8606541246e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f8bf2a61-7dfe-598e-8f6b-d29dc7b440ea", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "51147f2e-bdf1-5d9e-acf5-f079a111d323", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-marqo.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Marqo</div>\n        <small class=\"w-full\">Use Marqo locally for free. Requires additional setup. Or use their hosted option.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "2ff3ca97-cb35-56c7-9263-a3613e4ce005", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        MyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ef8ef020-2101-56df-8aa3-a09caae07971", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file