diff --git a/notebooks/bigquery-mistral-chromadb.ipynb b/notebooks/bigquery-mistral-chromadb.ipynb
new file mode 100644
index 00000000..de42986c
--- /dev/null
+++ b/notebooks/bigquery-mistral-chromadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "a4b4564c-96e3-553f-bd3a-fd78ff2c05fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a01f90bd-2a30-54e9-b224-6e41192bc874", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f73ae093-a468-58f7-8928-c8357277a05b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "604afb48-6829-54a6-b55b-447e57aad294", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,bigquery]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "df67fe44-2513-5b1b-b26c-9a40090cad0d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "a873b3f2-11d7-585f-b1e8-b119508eaf35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-mistral-other-vectordb.ipynb b/notebooks/bigquery-mistral-other-vectordb.ipynb
new file mode 100644
index 00000000..a96202ca
--- /dev/null
+++ b/notebooks/bigquery-mistral-other-vectordb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "95ac2fed-f042-5ddb-8a43-6055e0eb437b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a90edad1-d1a3-5c7c-83fb-e4455e43ea8c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0dfaee91-c14e-58d8-b1ab-adec336a6277", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "81efadca-5a5f-5a99-8133-b7fa84f17803", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,bigquery]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "9f2ee853-f355-5fb6-96b9-081087740800", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "8530cf11-bed9-5933-b8ea-644361db0e5e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-mistral-vannadb.ipynb b/notebooks/bigquery-mistral-vannadb.ipynb
new file mode 100644
index 00000000..a766dabc
--- /dev/null
+++ b/notebooks/bigquery-mistral-vannadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "5320f104-0de6-5010-91da-eb74229e24f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c9785756-74dc-5b62-9326-f3fcab07ba39", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "266ccdb8-45fd-583e-8c94-0749162edfa3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "81efadca-5a5f-5a99-8133-b7fa84f17803", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,bigquery]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "784c1725-e122-5013-af69-136dbf47e712", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "5ea587f2-3901-5ae4-9dff-7ec9a6a5883c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-chromadb.ipynb b/notebooks/bigquery-openai-azure-chromadb.ipynb
index 2f1cb670..f39d4156 100644
--- a/notebooks/bigquery-openai-azure-chromadb.ipynb
+++ b/notebooks/bigquery-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f439e467-8402-5423-9822-318c50b4831c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8263890c-aab4-5c63-abdd-bee76e425376", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "96e88da1-27bf-55c0-a07a-6734ac70a45d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "b9cb2809-a933-5cab-a0bb-bf20e4c8aa6d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f439e467-8402-5423-9822-318c50b4831c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "28490bf2-fdd2-54f5-985d-7de2b28c383e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "96e88da1-27bf-55c0-a07a-6734ac70a45d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "7f7b00e8-0cc7-54ff-abf8-4ea3aaee7a0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "b9cb2809-a933-5cab-a0bb-bf20e4c8aa6d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-other-vectordb.ipynb b/notebooks/bigquery-openai-azure-other-vectordb.ipynb
index 5d0d845f..561e70ca 100644
--- a/notebooks/bigquery-openai-azure-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "35185a9e-35a3-56fe-b403-fe55f23a645e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "3268a082-6df7-5f38-9b61-346896359675", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "e091ee15-bdd4-5e5e-a449-273559ead5bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "cc10852e-5853-5c8c-a6bd-0561797b1386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "35185a9e-35a3-56fe-b403-fe55f23a645e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ae9f6a12-733a-59e3-8ea5-6e747247433c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "e091ee15-bdd4-5e5e-a449-273559ead5bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "a4b7ba84-ae94-5470-8520-21d06dbee484", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "cc10852e-5853-5c8c-a6bd-0561797b1386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-vannadb.ipynb b/notebooks/bigquery-openai-azure-vannadb.ipynb
index 07cf0a80..2efd4f3f 100644
--- a/notebooks/bigquery-openai-azure-vannadb.ipynb
+++ b/notebooks/bigquery-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "fac5b46f-4116-58e0-889d-969818e7888b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "356b6fdb-eea5-5e81-8a4b-9037b24db0ab", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0352a46c-ea13-5ed1-95d3-8cbc90da996e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "c5473333-5768-5fb8-9247-e26ba37fbf98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "fac5b46f-4116-58e0-889d-969818e7888b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "909723d5-4778-5fc6-a3c2-1a0753650a1f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0352a46c-ea13-5ed1-95d3-8cbc90da996e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1ab3f192-9d0c-53c3-ab61-46bf91f402c7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "c5473333-5768-5fb8-9247-e26ba37fbf98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-chromadb.ipynb b/notebooks/bigquery-openai-standard-chromadb.ipynb
index f7ac8a7b..110f79b9 100644
--- a/notebooks/bigquery-openai-standard-chromadb.ipynb
+++ b/notebooks/bigquery-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "774bf429-d57a-589e-a818-f746cfbd1333", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "85aaf126-0a09-5638-873c-7c8a69ab06d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "225b22cd-cc83-592f-8aa9-e7222d40c4fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n            "}, {"id": "905acd30-8700-5d7e-8602-dd760e75e5df", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "774bf429-d57a-589e-a818-f746cfbd1333", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c034094a-80ae-5404-9716-f09b9c785fdd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "225b22cd-cc83-592f-8aa9-e7222d40c4fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "0ba8dbbb-e450-5e7f-a6a5-35b3800eb1a2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "905acd30-8700-5d7e-8602-dd760e75e5df", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-other-vectordb.ipynb b/notebooks/bigquery-openai-standard-other-vectordb.ipynb
index f2a9c2ee..82e6a9fc 100644
--- a/notebooks/bigquery-openai-standard-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e613d63a-b713-506f-8fb3-41b2c0a8e863", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "06452bd9-d462-5934-9d48-7bcbe1fa7424", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f5dde447-1b11-5e43-af09-d4a5345bacb1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "2718a3d3-183c-50b5-b959-be79bd3b071f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e613d63a-b713-506f-8fb3-41b2c0a8e863", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "60917c76-36c7-55b7-b1c4-5c82915d0d08", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f5dde447-1b11-5e43-af09-d4a5345bacb1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "3e2913dc-6022-5042-a129-1fc67577b2b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2718a3d3-183c-50b5-b959-be79bd3b071f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-vannadb.ipynb b/notebooks/bigquery-openai-standard-vannadb.ipynb
index e1b632bf..c3442d97 100644
--- a/notebooks/bigquery-openai-standard-vannadb.ipynb
+++ b/notebooks/bigquery-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "2475ecc1-295b-55a3-86b7-9b851bec073e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "eec50e07-2412-577f-9932-4a6e1e82720e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da558514-b83a-5046-9a4d-f414e5bdc0f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "29194c94-ffb6-5db6-a6f8-18a9ea63e8da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "2475ecc1-295b-55a3-86b7-9b851bec073e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "3229453a-a3f6-5835-bf6b-e921f8abddbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da558514-b83a-5046-9a4d-f414e5bdc0f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "2a4db58f-b60a-5019-a575-0a3faf4397eb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "29194c94-ffb6-5db6-a6f8-18a9ea63e8da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-chromadb.ipynb b/notebooks/bigquery-openai-vanna-chromadb.ipynb
index 51e84f6f..132fb657 100644
--- a/notebooks/bigquery-openai-vanna-chromadb.ipynb
+++ b/notebooks/bigquery-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "525bec9c-32bd-5fc9-b211-7909d17a700e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "9868a22d-dde5-5db9-9d20-313adfc566bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "6acfa978-70fd-5fc0-b5cc-1f08e5f9fa2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "4b45f470-52c3-5551-b0f5-28a169a2417c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "525bec9c-32bd-5fc9-b211-7909d17a700e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "93465ae8-5642-5cd8-abe5-20cce04e1c4f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "6acfa978-70fd-5fc0-b5cc-1f08e5f9fa2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "4b45f470-52c3-5551-b0f5-28a169a2417c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-other-vectordb.ipynb b/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
index 0f82bb1d..8b80c3f4 100644
--- a/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "987b3c74-035d-562b-9288-167f1a027019", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1c681249-9362-583b-88d4-d95605fb5a56", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "323a83c6-fff4-503f-bf6e-f67d82ebff62", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "cc057755-c7d2-5e29-8f5d-1c8a5627898b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "987b3c74-035d-562b-9288-167f1a027019", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8468a145-ef33-5a52-b9dd-f0f5280b0e99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "323a83c6-fff4-503f-bf6e-f67d82ebff62", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "cc057755-c7d2-5e29-8f5d-1c8a5627898b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-vannadb.ipynb b/notebooks/bigquery-openai-vanna-vannadb.ipynb
index c62c0b89..74bb1a41 100644
--- a/notebooks/bigquery-openai-vanna-vannadb.ipynb
+++ b/notebooks/bigquery-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "67a26e9c-c6fb-56bc-a8f2-fdcb5294e46e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "20ae2fd9-aaae-53e3-9f63-14f9f852c6c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da558514-b83a-5046-9a4d-f414e5bdc0f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "118ae34e-7db2-52a8-ba6a-e4cda0c5c684", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "67a26e9c-c6fb-56bc-a8f2-fdcb5294e46e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "07242f18-8018-5856-96bd-02f96b0dd5cc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da558514-b83a-5046-9a4d-f414e5bdc0f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "118ae34e-7db2-52a8-ba6a-e4cda0c5c684", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-chromadb.ipynb b/notebooks/bigquery-other-llm-chromadb.ipynb
index 2f263a85..d6944425 100644
--- a/notebooks/bigquery-other-llm-chromadb.ipynb
+++ b/notebooks/bigquery-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "670a54ff-01bf-5be5-bbd3-b8f7b39b67c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "aa67476e-d08b-5d53-bfba-64361994e30d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "b537adb9-0ba8-5d4a-b325-e517c6abc2e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      ChromaDB_VectorStore.__init__(self, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "aa232add-2e37-50a8-9e80-c1b5d8a98346", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "670a54ff-01bf-5be5-bbd3-b8f7b39b67c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "94a97b34-d9b6-56f6-a3ba-3881ab640c9f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "b537adb9-0ba8-5d4a-b325-e517c6abc2e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "162d14f3-c44c-5686-94c5-64298c05456e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "aa232add-2e37-50a8-9e80-c1b5d8a98346", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-other-vectordb.ipynb b/notebooks/bigquery-other-llm-other-vectordb.ipynb
index 1a821534..0b9b7e15 100644
--- a/notebooks/bigquery-other-llm-other-vectordb.ipynb
+++ b/notebooks/bigquery-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "329a34b6-ac61-5e7f-bd53-4dea75316751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "4a5c0082-e776-51f7-800f-e2a5d7e427bd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "36ad2412-992c-59fc-8b71-ca4d08ce4853", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "874380de-32c3-5bfc-b26b-3a4d227543fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "329a34b6-ac61-5e7f-bd53-4dea75316751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "46ab4cb5-529f-5357-b72a-b6d215937d54", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "36ad2412-992c-59fc-8b71-ca4d08ce4853", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "b4f1f3ff-ab01-5e8c-b90f-87dc9354394e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "874380de-32c3-5bfc-b26b-3a4d227543fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-vannadb.ipynb b/notebooks/bigquery-other-llm-vannadb.ipynb
index ff47fc2a..4eb20edc 100644
--- a/notebooks/bigquery-other-llm-vannadb.ipynb
+++ b/notebooks/bigquery-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "dab5efa9-fc30-5f4d-ae12-aca6cf81438d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8a869652-b05a-500c-bbdd-eba7206c348d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9a2db276-d7f0-588e-87bb-33a1a6a0277d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "f74a2155-0435-5765-a95e-f89dc789d8f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "dab5efa9-fc30-5f4d-ae12-aca6cf81438d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "29381975-fd85-523b-9ede-7dd7b9b69cc0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9a2db276-d7f0-588e-87bb-33a1a6a0277d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "31af41ed-c218-5c88-abe2-43880a5fe428", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "f74a2155-0435-5765-a95e-f89dc789d8f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-mistral-chromadb.ipynb b/notebooks/other-database-mistral-chromadb.ipynb
new file mode 100644
index 00000000..8ef363ec
--- /dev/null
+++ b/notebooks/other-database-mistral-chromadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "22323c93-597c-5855-a512-939a14abf9e5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6ae2763b-0004-5c7a-b3ac-a55a75770c34", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7a8290cb-31c0-5f75-879f-db4b411e0416", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30aef63a-a75b-52bd-8f9c-7fc096e0aba3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "df67fe44-2513-5b1b-b26c-9a40090cad0d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2c0b60ff-e9ac-56e1-96c9-004f1ca84273", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-mistral-other-vectordb.ipynb b/notebooks/other-database-mistral-other-vectordb.ipynb
new file mode 100644
index 00000000..75843734
--- /dev/null
+++ b/notebooks/other-database-mistral-other-vectordb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "924358e7-d53d-5554-baff-faa508826010", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "69c22c46-b8d4-5ad1-a81a-f71314449d2a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "55131877-181c-50e0-89e7-5cdd916f7e7b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "9f2ee853-f355-5fb6-96b9-081087740800", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "2269b218-ec69-598e-a8fa-19a8d7777081", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-mistral-vannadb.ipynb b/notebooks/other-database-mistral-vannadb.ipynb
new file mode 100644
index 00000000..e1589348
--- /dev/null
+++ b/notebooks/other-database-mistral-vannadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "0e71bc98-8716-580a-b971-496b54f1af8e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "08388fc8-a2a0-51ff-a240-e9c7c1921808", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "a5b630f8-2efa-5d47-b541-b1bb4c6f47fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "784c1725-e122-5013-af69-136dbf47e712", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "60b67b01-aa53-5466-be45-e3fac6b219d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-chromadb.ipynb b/notebooks/other-database-openai-azure-chromadb.ipynb
index c09bc4bc..c5c488cb 100644
--- a/notebooks/other-database-openai-azure-chromadb.ipynb
+++ b/notebooks/other-database-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "92ea27dc-5881-5eb6-93c3-455e04899d35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ac054eb1-0e41-5fb8-b113-8f5917e90af2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7acad14f-3030-5462-8cbb-7fcd09e23702", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "9f04a063-5398-5e44-ae93-cdde14aee529", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "92ea27dc-5881-5eb6-93c3-455e04899d35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a30bd540-225c-5ca0-9f80-3628945e8366", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7acad14f-3030-5462-8cbb-7fcd09e23702", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "7f7b00e8-0cc7-54ff-abf8-4ea3aaee7a0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "9f04a063-5398-5e44-ae93-cdde14aee529", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-other-vectordb.ipynb b/notebooks/other-database-openai-azure-other-vectordb.ipynb
index 1f36dd59..040a02a6 100644
--- a/notebooks/other-database-openai-azure-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "28e43ee2-89a3-5c6e-972c-18e14187ecbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "adac2cff-cac2-50b1-a3bc-fa33c666c4f3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "1c929444-5496-51a1-a50d-573bf8f8bf42", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "06e1d645-97b3-5338-b39a-ed29e0adae10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "28e43ee2-89a3-5c6e-972c-18e14187ecbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "98bd55a9-8534-57e0-bbf2-790053912d0d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "1c929444-5496-51a1-a50d-573bf8f8bf42", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "a4b7ba84-ae94-5470-8520-21d06dbee484", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "06e1d645-97b3-5338-b39a-ed29e0adae10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-vannadb.ipynb b/notebooks/other-database-openai-azure-vannadb.ipynb
index cc5753ec..d3eec944 100644
--- a/notebooks/other-database-openai-azure-vannadb.ipynb
+++ b/notebooks/other-database-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "c84b946c-c10e-5f4d-b0e9-1053cac2d9c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a67dcbda-66e8-5475-90f3-0af5f99a50be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "501eb87f-96cc-572a-8f3f-c750b45f9a2b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "5b6052f4-cae6-5e27-bb45-7e1c0aa43386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "c84b946c-c10e-5f4d-b0e9-1053cac2d9c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c08f1c4c-ba75-5ac2-882a-ca5ed62a8475", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "501eb87f-96cc-572a-8f3f-c750b45f9a2b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1ab3f192-9d0c-53c3-ab61-46bf91f402c7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "5b6052f4-cae6-5e27-bb45-7e1c0aa43386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-chromadb.ipynb b/notebooks/other-database-openai-standard-chromadb.ipynb
index 74dc2c4f..3d32b61b 100644
--- a/notebooks/other-database-openai-standard-chromadb.ipynb
+++ b/notebooks/other-database-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "deeb1a83-b623-52c8-9e9c-8fde4842d65e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b48e56e0-fa9c-57b1-bb0f-a883627b9b6a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bcef569-d644-5f3c-917a-8310f43644d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n            "}, {"id": "f79e1f30-941b-5975-b947-e84bba787d9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "deeb1a83-b623-52c8-9e9c-8fde4842d65e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b4e86bda-15bf-591d-88bb-fa94a50b50e0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bcef569-d644-5f3c-917a-8310f43644d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "0ba8dbbb-e450-5e7f-a6a5-35b3800eb1a2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "f79e1f30-941b-5975-b947-e84bba787d9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-other-vectordb.ipynb b/notebooks/other-database-openai-standard-other-vectordb.ipynb
index 204ab0f7..89822714 100644
--- a/notebooks/other-database-openai-standard-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "83e6caa0-808d-5e3f-a3bc-dd6055253309", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f75f42eb-b92e-5d53-bed3-c78023dd83e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "29473aa9-8149-5272-bf3b-e498edbcc2e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "863e74db-ce0d-5c2b-abcf-a57a0583b638", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "83e6caa0-808d-5e3f-a3bc-dd6055253309", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0dc9e571-d346-5296-9c2b-6288e2e7fdfd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "29473aa9-8149-5272-bf3b-e498edbcc2e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "3e2913dc-6022-5042-a129-1fc67577b2b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "863e74db-ce0d-5c2b-abcf-a57a0583b638", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-vannadb.ipynb b/notebooks/other-database-openai-standard-vannadb.ipynb
index 07c0ea69..ebf41115 100644
--- a/notebooks/other-database-openai-standard-vannadb.ipynb
+++ b/notebooks/other-database-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "46dcb449-a2c9-571d-a7ce-d6450eb19571", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dad326e9-00ee-5e09-b5e4-53dfcf50fd98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "805bcd0a-a97d-55f7-836f-9df9ced3fad4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "00c06f03-db34-5b90-9f5e-d3836f69d656", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "46dcb449-a2c9-571d-a7ce-d6450eb19571", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "db8cee28-61fc-5750-bc66-8a3f8e312e9c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "805bcd0a-a97d-55f7-836f-9df9ced3fad4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "2a4db58f-b60a-5019-a575-0a3faf4397eb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "00c06f03-db34-5b90-9f5e-d3836f69d656", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-chromadb.ipynb b/notebooks/other-database-openai-vanna-chromadb.ipynb
index de1edee1..7edbb012 100644
--- a/notebooks/other-database-openai-vanna-chromadb.ipynb
+++ b/notebooks/other-database-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0137feba-b7f4-52f8-985b-c86be0e0f5bb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dc98c84d-aefb-58a2-9cfb-212728645d08", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "317fde73-6c98-5e93-86b4-3ccb542cd831", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "2ad31f21-0a91-5647-8f68-469156d9f90c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0137feba-b7f4-52f8-985b-c86be0e0f5bb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "105b1b11-2b54-59ac-8b07-3fc3536824da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "317fde73-6c98-5e93-86b4-3ccb542cd831", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "2ad31f21-0a91-5647-8f68-469156d9f90c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-other-vectordb.ipynb b/notebooks/other-database-openai-vanna-other-vectordb.ipynb
index e2d96be9..1110e472 100644
--- a/notebooks/other-database-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "dea72209-4768-5f52-ba75-ae7671cc46fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a25ae5b7-8434-5986-ac9d-fbbd97e18339", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5789a6a3-f0b7-5afb-95d7-f46ebc0ca1eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "43195316-d36c-517e-a899-130dc36e8acd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "dea72209-4768-5f52-ba75-ae7671cc46fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a35e6042-02e6-5934-b6c7-45abff11bcef", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5789a6a3-f0b7-5afb-95d7-f46ebc0ca1eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "43195316-d36c-517e-a899-130dc36e8acd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-vannadb.ipynb b/notebooks/other-database-openai-vanna-vannadb.ipynb
index a3fc6c63..133e1d7b 100644
--- a/notebooks/other-database-openai-vanna-vannadb.ipynb
+++ b/notebooks/other-database-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0f6a628d-5dca-58b1-b690-40302dab9bf2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "695081ef-ae50-5a0c-8346-92d5fa7ad91e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "805bcd0a-a97d-55f7-836f-9df9ced3fad4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "056357dd-f7cf-599f-b170-2a75609c7f10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0f6a628d-5dca-58b1-b690-40302dab9bf2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1a507369-8ade-5a6e-b349-9927657cb37f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "805bcd0a-a97d-55f7-836f-9df9ced3fad4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "056357dd-f7cf-599f-b170-2a75609c7f10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-chromadb.ipynb b/notebooks/other-database-other-llm-chromadb.ipynb
index 45a0ab23..401ddbf2 100644
--- a/notebooks/other-database-other-llm-chromadb.ipynb
+++ b/notebooks/other-database-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "7b709983-57b5-5bc7-940a-bcbf832468c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "23f36fb4-5f32-5299-b982-3eb907384c18", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "1ac99bf9-08b8-5d77-82ee-7416409862ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      ChromaDB_VectorStore.__init__(self, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "10714cee-02a9-5d1e-aa5a-841894519801", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "7b709983-57b5-5bc7-940a-bcbf832468c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b348bb01-4210-561c-bbd9-759bb6a11099", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "1ac99bf9-08b8-5d77-82ee-7416409862ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "162d14f3-c44c-5686-94c5-64298c05456e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "10714cee-02a9-5d1e-aa5a-841894519801", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-other-vectordb.ipynb b/notebooks/other-database-other-llm-other-vectordb.ipynb
index bd7df920..6b608204 100644
--- a/notebooks/other-database-other-llm-other-vectordb.ipynb
+++ b/notebooks/other-database-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "d4a7bd4f-e238-5168-8c26-03ab7d9b6ceb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2f4ef27d-7fa0-542e-8ced-e0868950fc7e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "655215da-b934-5419-a73a-9bf4b77e96d8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "14a246cb-cc7e-5bb1-b931-cf58d8c86b53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "d4a7bd4f-e238-5168-8c26-03ab7d9b6ceb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6b7aad07-6701-5974-9b5a-c6ed21cc3eca", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "655215da-b934-5419-a73a-9bf4b77e96d8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "b4f1f3ff-ab01-5e8c-b90f-87dc9354394e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "14a246cb-cc7e-5bb1-b931-cf58d8c86b53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-vannadb.ipynb b/notebooks/other-database-other-llm-vannadb.ipynb
index 9a8bd1db..6c1f91e6 100644
--- a/notebooks/other-database-other-llm-vannadb.ipynb
+++ b/notebooks/other-database-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "8b253300-a4e8-5b94-8ef5-958eefde2756", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b907358f-58aa-52bb-88bd-05ad52607b83", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "a6970aff-d479-566f-8fd4-4cd44851ce28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "d2df2e38-f9f0-5483-9181-59cefa62c124", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "8b253300-a4e8-5b94-8ef5-958eefde2756", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "638a9f05-ed43-58ab-aa9b-2f88ddc6a370", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "a6970aff-d479-566f-8fd4-4cd44851ce28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "31af41ed-c218-5c88-abe2-43880a5fe428", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "d2df2e38-f9f0-5483-9181-59cefa62c124", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...}  # fill this with your connection details\nconn = ...  # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n    df = pd.read_sql_query(sql, conn)\n    return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-mistral-chromadb.ipynb b/notebooks/postgres-mistral-chromadb.ipynb
new file mode 100644
index 00000000..6065db63
--- /dev/null
+++ b/notebooks/postgres-mistral-chromadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "0f3048c8-acae-5ca8-b441-2e74e9190bc2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a03347e7-7aec-59e2-a48b-a8b757da02f7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "89d75cda-e5b9-5911-91ed-0cf5c06d7f69", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "8f8fcaac-414d-5dc4-9d96-2a42ce33b9db", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,postgres]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "df67fe44-2513-5b1b-b26c-9a40090cad0d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "3d1b4791-ba66-588e-90d5-81366b367805", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-mistral-other-vectordb.ipynb b/notebooks/postgres-mistral-other-vectordb.ipynb
new file mode 100644
index 00000000..b6a4369a
--- /dev/null
+++ b/notebooks/postgres-mistral-other-vectordb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "1bf7ca05-0a4b-56c4-8958-79b30e5afc2e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "13f1c9f5-c46b-56da-a35a-83299b1735b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8de7ee64-7ba3-5daf-860f-d4ba11e68b3f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "72ba7676-e2d8-5a7e-ac6e-f7578822efb5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,postgres]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "9f2ee853-f355-5fb6-96b9-081087740800", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "ef45e977-adff-5d4f-a769-28be071afb5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-mistral-vannadb.ipynb b/notebooks/postgres-mistral-vannadb.ipynb
new file mode 100644
index 00000000..6bf70ef1
--- /dev/null
+++ b/notebooks/postgres-mistral-vannadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "7eb3f36a-c79b-5c0c-8eb6-4601a6c3c634", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e0d82a7a-03af-5945-94b5-d9b71c024f01", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d602c88f-320a-57e8-85ee-73685b5c9cd1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "72ba7676-e2d8-5a7e-ac6e-f7578822efb5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,postgres]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "784c1725-e122-5013-af69-136dbf47e712", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "ce84935a-2ca8-5f5b-b3fc-1f0c77380d9a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-chromadb.ipynb b/notebooks/postgres-openai-azure-chromadb.ipynb
index a287d044..eaf172ae 100644
--- a/notebooks/postgres-openai-azure-chromadb.ipynb
+++ b/notebooks/postgres-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e7cd5976-e784-52c5-be86-b454ffa806c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "9facb873-2e7b-54ed-8d7b-8054ee2b1709", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7a79bcc1-c725-5378-bb13-75291ac67f99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "125a30f1-60b6-5ad5-a154-40363adc38d1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e7cd5976-e784-52c5-be86-b454ffa806c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d69856a3-3ea7-5ca5-8159-040262aa4049", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "7a79bcc1-c725-5378-bb13-75291ac67f99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "7f7b00e8-0cc7-54ff-abf8-4ea3aaee7a0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "125a30f1-60b6-5ad5-a154-40363adc38d1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-other-vectordb.ipynb b/notebooks/postgres-openai-azure-other-vectordb.ipynb
index d74c9f80..5d7554f4 100644
--- a/notebooks/postgres-openai-azure-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6c5bdd16-d84b-527f-a805-d01f53e7337c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "7c798bf5-b9ff-52c6-9317-467ed5d44b68", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "46f63606-ab8f-5d12-b7a2-ca467c1921e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "e11da77b-6e86-53a2-9c7b-700c2550921e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6c5bdd16-d84b-527f-a805-d01f53e7337c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "55d62c18-7f8e-53cf-8a60-fb5abd6efee4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "46f63606-ab8f-5d12-b7a2-ca467c1921e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "a4b7ba84-ae94-5470-8520-21d06dbee484", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "e11da77b-6e86-53a2-9c7b-700c2550921e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-vannadb.ipynb b/notebooks/postgres-openai-azure-vannadb.ipynb
index f22ac3f1..d4f54d02 100644
--- a/notebooks/postgres-openai-azure-vannadb.ipynb
+++ b/notebooks/postgres-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "778e258a-8586-5d24-bde4-0a1a6361f6a9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cff68f11-eb15-5e47-a761-a69d6110e63e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3f5adb23-3700-54cb-8072-9e953b9c5273", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "6aed1fd9-7ffe-503a-bd0d-ee0fb914aeff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "778e258a-8586-5d24-bde4-0a1a6361f6a9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b707bd2c-b6fa-517e-944f-1f316438c252", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3f5adb23-3700-54cb-8072-9e953b9c5273", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1ab3f192-9d0c-53c3-ab61-46bf91f402c7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "6aed1fd9-7ffe-503a-bd0d-ee0fb914aeff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-chromadb.ipynb b/notebooks/postgres-openai-standard-chromadb.ipynb
index 5c9fa300..56d543ef 100644
--- a/notebooks/postgres-openai-standard-chromadb.ipynb
+++ b/notebooks/postgres-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "95b3f23b-49e5-5f79-bedc-f3e5dea0f14f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a6516065-f98b-5674-ba9e-232c3fd9a992", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3cfc3d9c-9ca0-55b4-9a78-7c033b5a5bf0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n            "}, {"id": "69b47dc7-f766-59c7-8f50-3390f2044c0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "95b3f23b-49e5-5f79-bedc-f3e5dea0f14f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a552e618-fefd-5a0d-8005-c47323ee0a3a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3cfc3d9c-9ca0-55b4-9a78-7c033b5a5bf0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "0ba8dbbb-e450-5e7f-a6a5-35b3800eb1a2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "69b47dc7-f766-59c7-8f50-3390f2044c0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-other-vectordb.ipynb b/notebooks/postgres-openai-standard-other-vectordb.ipynb
index 7d481e69..e8b59f0e 100644
--- a/notebooks/postgres-openai-standard-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "ad6a7ca6-de11-5e6c-accf-b908b3b5f536", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6f847725-3387-5ed0-ab50-c42e58a31c0d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "bca6a0ac-79c7-59c7-8b54-1cba19c49b91", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "b3cf62dc-7c47-56f5-8dd9-65fc75597155", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "ad6a7ca6-de11-5e6c-accf-b908b3b5f536", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cc22b65c-c118-568d-95b5-320743ba50ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "bca6a0ac-79c7-59c7-8b54-1cba19c49b91", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "3e2913dc-6022-5042-a129-1fc67577b2b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "b3cf62dc-7c47-56f5-8dd9-65fc75597155", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-vannadb.ipynb b/notebooks/postgres-openai-standard-vannadb.ipynb
index 7d9f82fe..b1345baa 100644
--- a/notebooks/postgres-openai-standard-vannadb.ipynb
+++ b/notebooks/postgres-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "bd885f8c-8938-5a71-826f-6c4000c70508", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6f1aa45b-9c5c-5514-96a4-01cd2a4bc923", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee38d28e-9586-5360-a872-95655441a9d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "c6ccc82b-147a-5e2e-937d-74a8dc8c5582", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "bd885f8c-8938-5a71-826f-6c4000c70508", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "23336edb-af5c-5cee-a11a-da2a8355603f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee38d28e-9586-5360-a872-95655441a9d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "2a4db58f-b60a-5019-a575-0a3faf4397eb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "c6ccc82b-147a-5e2e-937d-74a8dc8c5582", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-chromadb.ipynb b/notebooks/postgres-openai-vanna-chromadb.ipynb
index 27b265e5..ed049a81 100644
--- a/notebooks/postgres-openai-vanna-chromadb.ipynb
+++ b/notebooks/postgres-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "df07813d-72a1-5452-8dc9-f6894538a24b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d36ef7f7-0b29-5253-bb56-78b79ab74e53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "24cf5a7e-e723-58d1-ad4e-b28ead2724aa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "febd7f76-f6f4-570d-a21f-a522c19a44dd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "df07813d-72a1-5452-8dc9-f6894538a24b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "75fcde7a-5d87-565f-81e2-8270d4b4be99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "24cf5a7e-e723-58d1-ad4e-b28ead2724aa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "febd7f76-f6f4-570d-a21f-a522c19a44dd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-other-vectordb.ipynb b/notebooks/postgres-openai-vanna-other-vectordb.ipynb
index 1357282f..2a2a5219 100644
--- a/notebooks/postgres-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "c6b9a340-0204-5267-b461-47450cdb8a2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "eedd396b-ecb5-55a8-b006-4387078a5045", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0bd9a7e5-7502-5050-a738-fca444680f71", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "ee11dcde-9152-5e53-860a-bb72016db15b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "c6b9a340-0204-5267-b461-47450cdb8a2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "502499f2-8200-5bdf-bc7d-4bd15b1e7f63", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0bd9a7e5-7502-5050-a738-fca444680f71", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ee11dcde-9152-5e53-860a-bb72016db15b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-vannadb.ipynb b/notebooks/postgres-openai-vanna-vannadb.ipynb
index d46d4f4f..7ae4f784 100644
--- a/notebooks/postgres-openai-vanna-vannadb.ipynb
+++ b/notebooks/postgres-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "ff3e46c3-5735-53fe-86ba-dd1ca947e4ba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "60e9fc1e-6dec-5b50-bf3d-d38b99ea9e52", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee38d28e-9586-5360-a872-95655441a9d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "73461d40-259b-59ae-a794-2fbc1806e412", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "ff3e46c3-5735-53fe-86ba-dd1ca947e4ba", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c263c951-b756-57eb-a66a-ef636ff0a696", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee38d28e-9586-5360-a872-95655441a9d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "73461d40-259b-59ae-a794-2fbc1806e412", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-chromadb.ipynb b/notebooks/postgres-other-llm-chromadb.ipynb
index b7056d88..3a3b7117 100644
--- a/notebooks/postgres-other-llm-chromadb.ipynb
+++ b/notebooks/postgres-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "30e88248-26ea-53ad-93a6-9b3d5da41033", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "bc051b84-36f0-5b70-b526-2523444ce4fd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9c8da62e-70bb-5a3b-b468-25c7275aa943", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      ChromaDB_VectorStore.__init__(self, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "2436d4ef-a066-5584-b97d-9696c8bbb9ec", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "30e88248-26ea-53ad-93a6-9b3d5da41033", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0dc3de1e-d806-5f7c-86aa-f0a764bbb64c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "9c8da62e-70bb-5a3b-b468-25c7275aa943", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "162d14f3-c44c-5686-94c5-64298c05456e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "2436d4ef-a066-5584-b97d-9696c8bbb9ec", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-other-vectordb.ipynb b/notebooks/postgres-other-llm-other-vectordb.ipynb
index 52e2ca4d..df5f60da 100644
--- a/notebooks/postgres-other-llm-other-vectordb.ipynb
+++ b/notebooks/postgres-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "3a23b994-dde6-5290-a4ae-5c0fbc8143d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e44713bb-5ba1-5a68-9d1b-9b8ee0b66b75", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "12091978-155e-5893-843f-42a69071be9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "8af744f9-5cb5-5a01-b58b-d736caac0164", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "3a23b994-dde6-5290-a4ae-5c0fbc8143d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1a323199-007a-5773-aab8-b6510f11824b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "12091978-155e-5893-843f-42a69071be9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "b4f1f3ff-ab01-5e8c-b90f-87dc9354394e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "8af744f9-5cb5-5a01-b58b-d736caac0164", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-vannadb.ipynb b/notebooks/postgres-other-llm-vannadb.ipynb
index e9fcdde7..c2617a4b 100644
--- a/notebooks/postgres-other-llm-vannadb.ipynb
+++ b/notebooks/postgres-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "cd757483-4d52-5a68-94b5-0244087a9cdb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "19d8bfa9-cb21-5f99-8fd1-e23bdc5456b1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "14feae0c-c763-54d1-bf1e-a2b013e18ddd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "7b97e4ca-e994-5cf9-a09a-36a8e12f6abd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "cd757483-4d52-5a68-94b5-0244087a9cdb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2ba202ba-7325-523a-9717-cb7730ea4253", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "14feae0c-c763-54d1-bf1e-a2b013e18ddd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "31af41ed-c218-5c88-abe2-43880a5fe428", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "7b97e4ca-e994-5cf9-a09a-36a8e12f6abd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-mistral-chromadb.ipynb b/notebooks/snowflake-mistral-chromadb.ipynb
new file mode 100644
index 00000000..880cfd32
--- /dev/null
+++ b/notebooks/snowflake-mistral-chromadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "581af781-64b0-5419-b34c-f423e998b939", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "028bcaab-c1fe-57d7-ad3d-eae62574637b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "02adbe75-1c74-5303-92ae-811004580c1c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "678cedb3-98fb-53c0-8cfd-c8663ea21113", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai,snowflake]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "df67fe44-2513-5b1b-b26c-9a40090cad0d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "af027177-7e5c-5b9a-ab52-93e9c0c3e2ff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-mistral-other-vectordb.ipynb b/notebooks/snowflake-mistral-other-vectordb.ipynb
new file mode 100644
index 00000000..28190577
--- /dev/null
+++ b/notebooks/snowflake-mistral-other-vectordb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "bb27d995-aa3f-5004-9976-87e64a33a9d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "59581b59-5760-589b-b8b8-423389281910", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "183935b0-040b-5a08-ab3c-3d950622c2f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "037d295c-00dd-5d10-9d2f-0e2b81de13d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,snowflake]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "9f2ee853-f355-5fb6-96b9-081087740800", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "9f1fa815-47fd-5a51-a79c-27065a397031", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-mistral-vannadb.ipynb b/notebooks/snowflake-mistral-vannadb.ipynb
new file mode 100644
index 00000000..39c72a7a
--- /dev/null
+++ b/notebooks/snowflake-mistral-vannadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "331570b7-be75-5d5f-8cff-7e4570addec8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "30ca42f3-0e6c-5e60-a359-503a87213bf2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "18dcf38e-e5cf-5a29-960b-b6ded0cc771b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "037d295c-00dd-5d10-9d2f-0e2b81de13d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai,snowflake]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "784c1725-e122-5013-af69-136dbf47e712", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "1e5dad10-0000-5918-b510-9aa80c876212", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-chromadb.ipynb b/notebooks/snowflake-openai-azure-chromadb.ipynb
index a5dde82a..0b15811f 100644
--- a/notebooks/snowflake-openai-azure-chromadb.ipynb
+++ b/notebooks/snowflake-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6287c685-ff0c-5a75-a58f-fd9d47b14fc3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "74d715d6-7ece-5bdd-aea9-8384950b3800", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0d342fd1-a5ff-5d7f-86c4-cad506f84ae4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "cd3454f0-9566-5b7b-8e28-ca637b1c3964", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6287c685-ff0c-5a75-a58f-fd9d47b14fc3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "7c603f36-8039-502a-983d-bf98a618b27a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "0d342fd1-a5ff-5d7f-86c4-cad506f84ae4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "7f7b00e8-0cc7-54ff-abf8-4ea3aaee7a0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "cd3454f0-9566-5b7b-8e28-ca637b1c3964", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-other-vectordb.ipynb b/notebooks/snowflake-openai-azure-other-vectordb.ipynb
index 082230e0..3a270fe7 100644
--- a/notebooks/snowflake-openai-azure-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6aac9f07-876b-5289-9f4e-7524132617ad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "eef234eb-aee8-541f-b32f-f8bde6f23759", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "44bc4304-7303-5efc-8b46-3c795d4758fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "a90a47b5-ed37-5e9a-abfb-044d3b6938e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6aac9f07-876b-5289-9f4e-7524132617ad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1124d018-1c19-5e7e-9bc5-0ef3f2d34a44", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "44bc4304-7303-5efc-8b46-3c795d4758fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "a4b7ba84-ae94-5470-8520-21d06dbee484", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "a90a47b5-ed37-5e9a-abfb-044d3b6938e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-vannadb.ipynb b/notebooks/snowflake-openai-azure-vannadb.ipynb
index 32f0ed22..b6122081 100644
--- a/notebooks/snowflake-openai-azure-vannadb.ipynb
+++ b/notebooks/snowflake-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "813e792b-6025-521d-acaf-cc20cbd83c99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "fb1e5992-1b48-5122-bc78-0d07ebc6a5c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f87cecbf-d98e-5a64-a354-715ab3da3c36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "ab74636a-a65f-5a2d-bca2-b59efd9b8e18", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "813e792b-6025-521d-acaf-cc20cbd83c99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2e6e05ce-2c8b-524e-9478-07968ff24fe5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f87cecbf-d98e-5a64-a354-715ab3da3c36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1ab3f192-9d0c-53c3-ab61-46bf91f402c7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "ab74636a-a65f-5a2d-bca2-b59efd9b8e18", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-chromadb.ipynb b/notebooks/snowflake-openai-standard-chromadb.ipynb
index e1f478d7..63215305 100644
--- a/notebooks/snowflake-openai-standard-chromadb.ipynb
+++ b/notebooks/snowflake-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "75481477-4412-582b-aba7-52183e26f37c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0735cc09-44bd-5162-bf7a-1e92ec717546", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "a8ac9a78-92a7-56dd-8352-c00f6df060ab", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n            "}, {"id": "6272d9bb-f7dd-5b43-8db4-64817b295df0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "75481477-4412-582b-aba7-52183e26f37c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "339703f1-d6cd-5c0f-8ac1-c221f0798512", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "a8ac9a78-92a7-56dd-8352-c00f6df060ab", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "0ba8dbbb-e450-5e7f-a6a5-35b3800eb1a2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "6272d9bb-f7dd-5b43-8db4-64817b295df0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-other-vectordb.ipynb b/notebooks/snowflake-openai-standard-other-vectordb.ipynb
index d73fd65f..184606d1 100644
--- a/notebooks/snowflake-openai-standard-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "a53aec8f-5a05-50f3-9209-7bbf1dca9e6f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8428c985-6bf9-5c87-9d34-56ae18e6f776", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d80b2999-7fe1-5d05-9db4-bc257c081a7c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "8bbc71c3-929d-5119-9d69-63fa678c23a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "a53aec8f-5a05-50f3-9209-7bbf1dca9e6f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "4486c1b3-6c3e-530c-8386-50ca4b3f99e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "d80b2999-7fe1-5d05-9db4-bc257c081a7c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "3e2913dc-6022-5042-a129-1fc67577b2b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "8bbc71c3-929d-5119-9d69-63fa678c23a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-vannadb.ipynb b/notebooks/snowflake-openai-standard-vannadb.ipynb
index f153ac3e..7fca09fd 100644
--- a/notebooks/snowflake-openai-standard-vannadb.ipynb
+++ b/notebooks/snowflake-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f13605be-e49c-5be3-87bc-2a90efb57306", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcb1bb48-bc00-5b3f-92c3-0fe0d671c153", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8fb7581d-c7bf-55d2-9df9-94635c47f261", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "2ebd09c1-87dd-501f-a64a-f1404386b4f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f13605be-e49c-5be3-87bc-2a90efb57306", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e2b2b876-72a0-5854-9177-5ad9bcc29ef7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8fb7581d-c7bf-55d2-9df9-94635c47f261", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "2a4db58f-b60a-5019-a575-0a3faf4397eb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2ebd09c1-87dd-501f-a64a-f1404386b4f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-chromadb.ipynb b/notebooks/snowflake-openai-vanna-chromadb.ipynb
index faa822ad..2d3fdbd4 100644
--- a/notebooks/snowflake-openai-vanna-chromadb.ipynb
+++ b/notebooks/snowflake-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e493593c-e8c4-5cdd-bdb8-e5d8bb39b0c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f8394c9a-5605-50ad-8250-3ab96af74425", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ecdf606b-37b1-57e1-a8c1-39ec21da67f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "fdfb6c1f-9b01-5034-9aa5-75cd2e199d28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e493593c-e8c4-5cdd-bdb8-e5d8bb39b0c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "21979433-946d-5051-90ef-4b087189e318", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ecdf606b-37b1-57e1-a8c1-39ec21da67f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "fdfb6c1f-9b01-5034-9aa5-75cd2e199d28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-other-vectordb.ipynb b/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
index 0b04bb2c..59e42db5 100644
--- a/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "42212054-35d6-56f3-be8e-47af265d9df9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "63dd4380-37f0-5d63-b20e-7373b1487925", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "bb99a675-f2f6-5840-9f13-13982b887387", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "deada75c-50bd-5cd8-95ff-e187e794d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "42212054-35d6-56f3-be8e-47af265d9df9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c38dad5b-51c7-5b49-9e21-6dbace6b923c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "bb99a675-f2f6-5840-9f13-13982b887387", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "deada75c-50bd-5cd8-95ff-e187e794d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-vannadb.ipynb b/notebooks/snowflake-openai-vanna-vannadb.ipynb
index 479094b9..1cc657fc 100644
--- a/notebooks/snowflake-openai-vanna-vannadb.ipynb
+++ b/notebooks/snowflake-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "94653bec-534d-5627-9309-0f5d8df292eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1979a126-1340-556c-8ec4-3244da479eda", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8fb7581d-c7bf-55d2-9df9-94635c47f261", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "2ff941e7-752b-5c1c-bdf2-d3ab0b17cb48", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "94653bec-534d-5627-9309-0f5d8df292eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e3e271b5-29aa-5174-b69e-2260293a6c0a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "8fb7581d-c7bf-55d2-9df9-94635c47f261", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "2ff941e7-752b-5c1c-bdf2-d3ab0b17cb48", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-chromadb.ipynb b/notebooks/snowflake-other-llm-chromadb.ipynb
index 361a0400..053d23ce 100644
--- a/notebooks/snowflake-other-llm-chromadb.ipynb
+++ b/notebooks/snowflake-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f755ebca-fde7-5eb1-802d-2c830e0b6282", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "945069a0-5611-5b19-abc7-025b5c4ab63b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "765e0228-2970-5cd0-b71c-7880112c121b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      ChromaDB_VectorStore.__init__(self, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "fbd8aa4c-3dcb-52af-90a7-f358238c13b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f755ebca-fde7-5eb1-802d-2c830e0b6282", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0ceeb6ff-3607-5b14-9238-000548eb8fcd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "765e0228-2970-5cd0-b71c-7880112c121b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "162d14f3-c44c-5686-94c5-64298c05456e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "fbd8aa4c-3dcb-52af-90a7-f358238c13b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-other-vectordb.ipynb b/notebooks/snowflake-other-llm-other-vectordb.ipynb
index cdcb05b1..0051bf42 100644
--- a/notebooks/snowflake-other-llm-other-vectordb.ipynb
+++ b/notebooks/snowflake-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "cb9146eb-a0a0-5e0f-82b7-59e92f069498", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcfb5b01-ae11-5d69-a537-5994e61953c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "1577379b-aaa2-5b38-b4de-eeb16e98bcaf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "c1fd8a1f-0db6-5067-af77-776edcb4ed06", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "cb9146eb-a0a0-5e0f-82b7-59e92f069498", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcd9d3c5-e561-5d70-a775-f0f40a772553", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "1577379b-aaa2-5b38-b4de-eeb16e98bcaf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "b4f1f3ff-ab01-5e8c-b90f-87dc9354394e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "c1fd8a1f-0db6-5067-af77-776edcb4ed06", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-vannadb.ipynb b/notebooks/snowflake-other-llm-vannadb.ipynb
index badedf52..2b17c883 100644
--- a/notebooks/snowflake-other-llm-vannadb.ipynb
+++ b/notebooks/snowflake-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "5dcc604a-caa9-5f6d-ab0e-3f417516c076", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "15b23da4-cc7e-595d-a5f7-822a32b0f4e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "76440ab2-1a0b-5d31-a223-58bc3bd7a8fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "4442a4e5-f9cb-5f37-bf17-c51c08b5a35b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "5dcc604a-caa9-5f6d-ab0e-3f417516c076", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "aadb008e-18eb-5637-b13d-7a2626e8573d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "76440ab2-1a0b-5d31-a223-58bc3bd7a8fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "31af41ed-c218-5c88-abe2-43880a5fe428", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "4442a4e5-f9cb-5f37-bf17-c51c08b5a35b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n    account=\"myaccount\",\n    username=\"myusername\",\n    password=\"mypassword\",\n    database=\"mydatabase\",\n    role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-mistral-chromadb.ipynb b/notebooks/sqlite-mistral-chromadb.ipynb
new file mode 100644
index 00000000..ba4f95d8
--- /dev/null
+++ b/notebooks/sqlite-mistral-chromadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "b901f917-7d1d-5429-b7f3-628b1f239f8d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "540b0186-561a-5cfa-817b-86688e213f3e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "f67b8274-c4b6-555a-a12c-a4fb0bad0e4d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30aef63a-a75b-52bd-8f9c-7fc096e0aba3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,mistralai]'"}, {"id": "3a68caf5-fa2e-5ee9-9bbb-7b85ea07a5ea", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "df67fe44-2513-5b1b-b26c-9a40090cad0d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "a46a1092-3b46-50a0-bf0b-15cabe6acf0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-mistral-other-vectordb.ipynb b/notebooks/sqlite-mistral-other-vectordb.ipynb
new file mode 100644
index 00000000..b2a54dd5
--- /dev/null
+++ b/notebooks/sqlite-mistral-other-vectordb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "0aeab80e-f7e9-5ae8-967e-72d92129951d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2ece6ad9-270f-59ff-a4aa-db2654c5eafd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "5f5028e3-8438-5909-a9c0-d8f2db011844", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "d54a05e2-de07-56c0-b57f-0bf2d42e559c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "9f2ee853-f355-5fb6-96b9-081087740800", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, Mistral):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "0c312e61-5705-58ef-8d4c-2e9d0e29c6c1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-mistral-vannadb.ipynb b/notebooks/sqlite-mistral-vannadb.ipynb
new file mode 100644
index 00000000..f714333c
--- /dev/null
+++ b/notebooks/sqlite-mistral-vannadb.ipynb
@@ -0,0 +1 @@
+{"cells": [{"id": "5c6445ea-8f7a-5824-b2df-adc2e1e7f405", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Mistral via Mistral API, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cd81f971-231c-5588-ade6-ba27ec4cbd26", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "18a145ff-b10a-5c6f-a734-82c0fc68dce9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "22d0fc58-9374-5bda-9414-d80b35fecb42", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[mistralai]'"}, {"id": "88b7ebec-d4f9-53aa-8f06-7c27055d16b0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.mistral.mistral import Mistral\n"}, {"id": "784c1725-e122-5013-af69-136dbf47e712", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, Mistral):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMistral.__init__(self, config={'api_key': MISTRAL_API_KEY, 'model': 'mistral-tiny'})\n\nvn = MyVanna()\n"}, {"id": "3d804717-e109-5360-9f2d-98de92bf94c5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-chromadb.ipynb b/notebooks/sqlite-openai-azure-chromadb.ipynb
index 15dd4932..c27d5b37 100644
--- a/notebooks/sqlite-openai-azure-chromadb.ipynb
+++ b/notebooks/sqlite-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "262d99dc-b1f5-52a2-b3dd-c959896d40d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "028d9db5-f0e6-5b2e-a8ff-59aa1c44d06a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "af9b8e47-251a-57c4-bbf8-b44aff8fd7d4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "860d03fb-aa03-53d2-b703-3ca6f77232b0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "262d99dc-b1f5-52a2-b3dd-c959896d40d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b988d8ae-969c-5d2d-9548-d79ab9566238", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "af9b8e47-251a-57c4-bbf8-b44aff8fd7d4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "7f7b00e8-0cc7-54ff-abf8-4ea3aaee7a0a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "860d03fb-aa03-53d2-b703-3ca6f77232b0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-other-vectordb.ipynb b/notebooks/sqlite-openai-azure-other-vectordb.ipynb
index 5b8829a4..e8ee4aea 100644
--- a/notebooks/sqlite-openai-azure-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "48659c68-dbb3-5f89-a750-75a3bd1d4872", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1fa03e27-468c-521b-8b1f-a2f02afbc8f7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "80ef7877-e065-5927-8d3d-3c7a24883e17", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "7ac5fa06-681a-5c2f-abdc-8541cfe6f770", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "48659c68-dbb3-5f89-a750-75a3bd1d4872", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ab32bcb2-1993-56d5-886f-8262fde1de35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "80ef7877-e065-5927-8d3d-3c7a24883e17", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "a4b7ba84-ae94-5470-8520-21d06dbee484", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "7ac5fa06-681a-5c2f-abdc-8541cfe6f770", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-vannadb.ipynb b/notebooks/sqlite-openai-azure-vannadb.ipynb
index 8e062179..3649f17d 100644
--- a/notebooks/sqlite-openai-azure-vannadb.ipynb
+++ b/notebooks/sqlite-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "79b5f2a1-f4ac-5c48-9451-4d4c5e9bbb4d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1eee9e17-3f72-51f9-9522-da40b200cd94", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "c919366c-b0da-5326-9b4f-a5e5ee71b7be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n    \"api_type\": \"azure\",\n    \"api_base\": \"https://...\",\n    \"api_version\": \"2023-05-15\",\n    \"engine\": \"YOUR_ENGINE_HERE\",\n    \"api_key\": \"sk-...\"\n})\n"}, {"id": "e3bfcfae-1df1-5e15-a4f1-e49fa8aed61b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "79b5f2a1-f4ac-5c48-9451-4d4c5e9bbb4d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "09f17b31-ae67-5c56-a75e-86914502d751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "c919366c-b0da-5326-9b4f-a5e5ee71b7be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "1ab3f192-9d0c-53c3-ab61-46bf91f402c7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={\n\t'api_type': azure,\n\t'api_base': https://...,\n\t'api_version': 2023-05-15,\n\t'engine': YOUR_ENGINE_HERE,\n\t'api_key': sk-...,\n})\n"}, {"id": "e3bfcfae-1df1-5e15-a4f1-e49fa8aed61b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-chromadb.ipynb b/notebooks/sqlite-openai-standard-chromadb.ipynb
index d53bb6b4..65214c77 100644
--- a/notebooks/sqlite-openai-standard-chromadb.ipynb
+++ b/notebooks/sqlite-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "af8cd30a-415d-5ac9-9511-853d099fca5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8c7fce83-a9d6-5846-9f59-6217ea40d3e9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ba1444db-44d3-5ca7-8461-308b312e053f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n            "}, {"id": "70963293-29e6-57c4-95f1-b5045d63a75b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "af8cd30a-415d-5ac9-9511-853d099fca5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "110b678c-bd21-5651-9f10-6049f62e5edc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ba1444db-44d3-5ca7-8461-308b312e053f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "0ba8dbbb-e450-5e7f-a6a5-35b3800eb1a2", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "70963293-29e6-57c4-95f1-b5045d63a75b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-other-vectordb.ipynb b/notebooks/sqlite-openai-standard-other-vectordb.ipynb
index 39670bda..15a83d09 100644
--- a/notebooks/sqlite-openai-standard-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "8fedd282-2590-569a-93ce-ab6c6a4fa48a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "97ad1de6-1fb0-5c04-8378-6c21db0447da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3da5bae9-ef50-5164-9b90-2eeb8c07e96f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n  def add_ddl(self, ddl: str, **kwargs) -> str:\n     # Implement here\n\n  def add_documentation(self, doc: str, **kwargs) -> str:\n     # Implement here\n\n  def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n     # Implement here\n\n  def get_related_ddl(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_related_documentation(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_similar_question_sql(self, question: str, **kwargs) -> list:\n     # Implement here\n\n  def get_training_data(self, **kwargs) -> pd.DataFrame:\n     # Implement here\n\n  def remove_training_data(id: str, **kwargs) -> bool:\n     # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "aac12e94-b008-55dc-af6f-3199c9b8fb36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "8fedd282-2590-569a-93ce-ab6c6a4fa48a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2218e0a1-fa4e-56e9-9926-933c4a92043b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "3da5bae9-ef50-5164-9b90-2eeb8c07e96f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "3e2913dc-6022-5042-a129-1fc67577b2b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB, OpenAI_Chat):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "aac12e94-b008-55dc-af6f-3199c9b8fb36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-vannadb.ipynb b/notebooks/sqlite-openai-standard-vannadb.ipynb
index 2583bc2d..46d9ae9a 100644
--- a/notebooks/sqlite-openai-standard-vannadb.ipynb
+++ b/notebooks/sqlite-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "078f7efe-c23e-5d4c-98ee-a1d3e014992f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "32bc8aa3-e5a6-575b-bbb4-aaed33dd641f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "394e0f93-821f-5ba7-9cf5-4574fbb026bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n        OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "2de3b0d3-71e4-5335-9e65-29c27aff8e1d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "078f7efe-c23e-5d4c-98ee-a1d3e014992f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "3b9fc8e7-75e3-5625-b12b-40759de02591", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "394e0f93-821f-5ba7-9cf5-4574fbb026bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "2a4db58f-b60a-5019-a575-0a3faf4397eb", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(VannaDB_VectorStore, OpenAI_Chat):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tOpenAI_Chat.__init__(self, config=config)\n\nvn = MyVanna(config={'api_key': 'sk-...', 'model': 'gpt-4-...'})\n"}, {"id": "2de3b0d3-71e4-5335-9e65-29c27aff8e1d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-chromadb.ipynb b/notebooks/sqlite-openai-vanna-chromadb.ipynb
index eb1bead7..41237559 100644
--- a/notebooks/sqlite-openai-vanna-chromadb.ipynb
+++ b/notebooks/sqlite-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "9d6519a8-e544-5523-82ce-97784be01264", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "5d6f5c01-f5dc-5175-8812-f4cb8e18fb3a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "63b0e89e-0abb-50e3-8485-70155d8aa30b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "13991486-a5af-5687-9351-5b4159aeb502", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "9d6519a8-e544-5523-82ce-97784be01264", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "96bb2afc-e019-52bb-b5ae-575a74d42bd3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "63b0e89e-0abb-50e3-8485-70155d8aa30b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "3225927e-ae19-5159-a112-8dac5a3cda22", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\n\nclass MyVanna(ChromaDB_VectorStore):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "13991486-a5af-5687-9351-5b4159aeb502", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-other-vectordb.ipynb b/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
index 934d05fc..d0746d79 100644
--- a/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "506bdbf0-9097-5ea8-a3ac-fbbee8ce189c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f60d2282-6489-597d-9427-ba74a7d299fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da681ba4-1d8d-5ff4-be35-5803a677a21c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "f5348d7a-0fae-5651-880e-1b365a3a1257", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "506bdbf0-9097-5ea8-a3ac-fbbee8ce189c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d0d5c54c-adbb-572b-a2e5-bd708e7d6392", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "da681ba4-1d8d-5ff4-be35-5803a677a21c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "6d63ef56-39f0-5597-bc1b-28acd6031e25", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\n\nclass MyVanna(MyCustomVectorDB):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "f5348d7a-0fae-5651-880e-1b365a3a1257", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-vannadb.ipynb b/notebooks/sqlite-openai-vanna-vannadb.ipynb
index 097b9623..9df42a31 100644
--- a/notebooks/sqlite-openai-vanna-vannadb.ipynb
+++ b/notebooks/sqlite-openai-vanna-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "3b9016b5-2bfd-5bd2-98e7-8ba4a9dec1be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "647e7afa-8fc3-5046-a40a-9082fe485696", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "394e0f93-821f-5ba7-9cf5-4574fbb026bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "1f3ff57f-259c-548e-bf2c-92170dfcf1b5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "3b9016b5-2bfd-5bd2-98e7-8ba4a9dec1be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e5b1e80e-8db4-5085-8d4f-6e34ca21d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "394e0f93-821f-5ba7-9cf5-4574fbb026bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "1e2a3991-20f4-56ed-9d26-59bd14b27cc5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "import vanna as vn"}, {"id": "85dfc95b-2e52-5383-9e25-7e5284f5bba5", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "api_key = vn.get_api_key('my-email@example.com')\nvn.set_api_key(api_key)\nvn.set_model('my-model')"}, {"id": "1f3ff57f-259c-548e-bf2c-92170dfcf1b5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-chromadb.ipynb b/notebooks/sqlite-other-llm-chromadb.ipynb
index e21ca4f3..af243659 100644
--- a/notebooks/sqlite-other-llm-chromadb.ipynb
+++ b/notebooks/sqlite-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "d3c0d01c-97e4-50e9-8e05-32bbed37f5d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8abf4215-90e0-58a1-868a-71e3e76fb0d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "8b5cba50-be4a-5111-8bbf-c7235bdd38c2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      ChromaDB_VectorStore.__init__(self, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "b2854838-2902-59d3-b55b-bf59f3a4d888", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "d3c0d01c-97e4-50e9-8e05-32bbed37f5d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a3b250e1-ea89-548a-ac9d-73350818add5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "8b5cba50-be4a-5111-8bbf-c7235bdd38c2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "162d14f3-c44c-5686-94c5-64298c05456e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(ChromaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        ChromaDB_VectorStore.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "b2854838-2902-59d3-b55b-bf59f3a4d888", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-other-vectordb.ipynb b/notebooks/sqlite-other-llm-other-vectordb.ipynb
index c2c9fc2e..6b843857 100644
--- a/notebooks/sqlite-other-llm-other-vectordb.ipynb
+++ b/notebooks/sqlite-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e7a07123-59ae-5e9d-9fc4-8606541246e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2150480b-b185-5dec-8ce6-febe910b1715", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "e34c0da3-6b6a-5a04-aa97-d3dbcef9aec7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "ef8ef020-2101-56df-8aa3-a09caae07971", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e7a07123-59ae-5e9d-9fc4-8606541246e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f8bf2a61-7dfe-598e-8f6b-d29dc7b440ea", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "e34c0da3-6b6a-5a04-aa97-d3dbcef9aec7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "b4f1f3ff-ab01-5e8c-b90f-87dc9354394e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(MyCustomVectorDB, MyCustomLLM):\n    def __init__(self, config=None):\n        MyCustomVectorDB.__init__(self, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "ef8ef020-2101-56df-8aa3-a09caae07971", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-vannadb.ipynb b/notebooks/sqlite-other-llm-vannadb.ipynb
index 4fb77d4b..48e08af5 100644
--- a/notebooks/sqlite-other-llm-vannadb.ipynb
+++ b/notebooks/sqlite-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "22fc9194-d307-554e-84c7-5c61665bd738", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b1c0a0b1-77c4-55b4-8715-033185b75886", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "7090c61a-c77d-565f-92d5-3faf38d8e9bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n  def __init__(self, config=None):\n      VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n      MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "b141daf0-a79d-59a4-811c-1626ba88d07a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "22fc9194-d307-554e-84c7-5c61665bd738", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ba3c203c-27c0-589c-82e4-c1411f3af28d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which LLM do you want to use?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <a href=\"sqlite-openai-vanna-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI via Vanna.AI (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AI for free to generate your queries</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-standard-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">OpenAI</div>\n        <small class=\"w-full\">Use OpenAI with your own API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-openai-azure-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Azure OpenAI</div>\n        <small class=\"w-full\">If you have OpenAI models deployed on Azure</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-mistral-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Mistral via Mistral API</div>\n        <small class=\"w-full\">If you have a Mistral API key</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Other LLM</div>\n        <small class=\"w-full\">If you have a different LLM model</small>\n      </div>\n    </span>\n  </li>\n  \n</ul>\n    "}, {"id": "7090c61a-c77d-565f-92d5-3faf38d8e9bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Where do you want to store the 'training' data?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> Vanna Hosted Vector DB (Recommended)</div>\n        <small class=\"w-full\">Use Vanna.AIs hosted vector database (pgvector) for free. This is usable across machines with no additional setup.</small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"sqlite-other-llm-chromadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">ChromaDB</div>\n        <small class=\"w-full\">Use ChromaDBs open-source vector database for free locally. No additional setup is necessary -- all database files will be created and stored locally.</small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"sqlite-other-llm-other-vectordb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other VectorDB</div>\n        <small class=\"w-full\">Use any other vector database. Requires additional setup.</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "31af41ed-c218-5c88-abe2-43880a5fe428", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n\nclass MyCustomLLM(VannaBase):\n  def __init__(self, config=None):\n    pass\n\n  def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n    # Implement here\n\n  def generate_question(self, sql: str, **kwargs) -> str:\n    # Implement here\n    \n  def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n  \n  def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n    # Implement here\n\n  def submit_prompt(self, prompt, **kwargs) -> str:\n    # Implement here\n            \n\nclass MyVanna(VannaDB_VectorStore, MyCustomLLM):\n    def __init__(self, config=None):\n        VannaDB_VectorStore.__init__(self, vanna_model=MY_VANNA_MODEL, vanna_api_key=MY_VANNA_API_KEY, config=config)\n\t\tMyCustomLLM.__init__(self, config=config)\n\nvn = MyVanna()\n"}, {"id": "b141daf0-a79d-59a4-811c-1626ba88d07a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n<h3 class=\"mb-5 text-lg font-medium text-gray-900 dark:text-white\">Which database do you want to query?</h3>\n<ul class=\"grid w-full gap-6 md:grid-cols-2\">\n    \n  <li>\n    <span class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border bg-white p-5 border-blue-600 text-blue-600 dark:bg-gray-800 dark:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\"><span class=\"hidden\">[Selected]</span> SQLite</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </span>\n  </li>\n  \n  <li>\n    <a href=\"snowflake-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Snowflake</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"bigquery-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">BigQuery</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"postgres-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Postgres</div>\n        <small class=\"w-full\"></small>\n      </div>\n    </a>\n  </li>\n    \n  <li>\n    <a href=\"other-database-other-llm-vannadb.html\" class=\"inline-flex w-full cursor-pointer items-center justify-between rounded-lg border border-gray-200 bg-white p-5 text-gray-500 hover:bg-gray-100 hover:text-gray-600 peer-checked:border-blue-600 peer-checked:text-blue-600 dark:border-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-gray-300 dark:peer-checked:text-blue-500\">\n      <div class=\"block\">\n        <div class=\"w-full text-lg font-semibold\">Other Database</div>\n        <small class=\"w-full\">Use Vanna to generate queries for any SQL database</small>\n      </div>\n    </a>\n  </li>\n    \n</ul>\n    "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n    CREATE TABLE IF NOT EXISTS my-table (\n        id INT PRIMARY KEY,\n        name VARCHAR(100),\n        age INT\n    )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 22d8c6e5..ca6454e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
 
 [project]
 name = "vanna"
-version = "0.0.29"
+version = "0.0.30"
 authors = [
   { name="Zain Hoda", email="zain@vanna.ai" },
 ]
@@ -33,3 +33,5 @@ all = ["psycopg2", "db-dtypes", "google-cloud-bigquery", "snowflake-connector-py
 test = ["tox"]
 chromadb = ["chromadb"]
 openai = ["openai"]
+mistralai = ["mistralai"]
+gemini = ["google-generativeai"]
diff --git a/src/vanna/base/base.py b/src/vanna/base/base.py
index af052798..d4fd6ccc 100644
--- a/src/vanna/base/base.py
+++ b/src/vanna/base/base.py
@@ -823,9 +823,28 @@ def get_plotly_figure(
             plotly.graph_objs.Figure: The Plotly figure.
         """
         ldict = {"df": df, "px": px, "go": go}
-        exec(plotly_code, globals(), ldict)
+        try:
+            exec(plotly_code, globals(), ldict)
 
-        fig = ldict.get("fig", None)
+            fig = ldict.get("fig", None)
+        except Exception as e:
+            # Inspect data types
+            numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
+            categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
+            
+            # Decision-making for plot type
+            if len(numeric_cols) >= 2:
+                # Use the first two numeric columns for a scatter plot
+                fig = px.scatter(df, x=numeric_cols[0], y=numeric_cols[1])
+            elif len(numeric_cols) == 1 and len(categorical_cols) >= 1:
+                # Use a bar plot if there's one numeric and one categorical column
+                fig = px.bar(df, x=categorical_cols[0], y=numeric_cols[0])
+            elif len(categorical_cols) >= 1 and df[categorical_cols[0]].nunique() < 10:
+                # Use a pie chart for categorical data with fewer unique values
+                fig = px.pie(df, names=categorical_cols[0])
+            else:
+                # Default to a simple line plot if above conditions are not met
+                fig = px.line(df)
 
         if fig is None:
             return None
diff --git a/src/vanna/mistral/__init__.py b/src/vanna/mistral/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/vanna/mistral/mistral.py b/src/vanna/mistral/mistral.py
new file mode 100644
index 00000000..8acccfcd
--- /dev/null
+++ b/src/vanna/mistral/mistral.py
@@ -0,0 +1,180 @@
+from mistralai.client import MistralClient
+from mistralai.models.chat_completion import ChatMessage
+from ..base import VannaBase
+import re
+
+class Mistral(VannaBase):
+    def __init__(self, config=None):
+        if config is None:
+            raise ValueError("For Mistral, config must be provided with an api_key and model")
+
+        if 'api_key' not in config:
+            raise ValueError("config must contain a Mistral api_key")
+        
+        if 'model' not in config:
+            raise ValueError("config must contain a Mistral model")
+
+        api_key = config['api_key']
+        model = config['model']
+        self.client = MistralClient(api_key=api_key)
+        self.model = model
+
+    def _extract_python_code(self, markdown_string: str) -> str:
+        # Regex pattern to match Python code blocks
+        pattern = r"```[\w\s]*python\n([\s\S]*?)```|```([\s\S]*?)```"
+
+        # Find all matches in the markdown string
+        matches = re.findall(pattern, markdown_string, re.IGNORECASE)
+
+        # Extract the Python code from the matches
+        python_code = []
+        for match in matches:
+            python = match[0] if match[0] else match[1]
+            python_code.append(python.strip())
+
+        if len(python_code) == 0:
+            return markdown_string
+
+        return python_code[0]
+
+    def _sanitize_plotly_code(self, raw_plotly_code: str) -> str:
+        # Remove the fig.show() statement from the plotly code
+        plotly_code = raw_plotly_code.replace("fig.show()", "")
+
+        return plotly_code
+
+    def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:
+        if question is not None:
+            system_msg = f"The following is a pandas DataFrame that contains the results of the query that answers the question the user asked: '{question}'"
+        else:
+            system_msg = "The following is a pandas DataFrame "
+
+        if sql is not None:
+            system_msg += f"\n\nThe DataFrame was produced using this query: {sql}\n\n"
+
+        system_msg += f"The following is information about the resulting pandas DataFrame 'df': \n{df_metadata}"
+
+        message_log = [
+            self.system_message(system_msg),
+            self.user_message(
+                "Can you generate the Python plotly code to chart the results of the dataframe? Assume the data is in a pandas dataframe called 'df'. If there is only one value in the dataframe, use an Indicator. Respond with only Python code. Do not answer with any explanations -- just the code."
+            ),
+        ]
+
+        plotly_code = self.submit_prompt(message_log, kwargs=kwargs)
+
+        return self._sanitize_plotly_code(self._extract_python_code(plotly_code))
+    
+    def generate_question(self, sql: str, **kwargs) -> str:
+        response = self.submit_prompt(
+            [
+                self.system_message(
+                    "The user will give you SQL and you will try to guess what the business question this query is answering. Return just the question without any additional explanation. Do not reference the table name in the question."
+                ),
+                self.user_message(sql),
+            ],
+            **kwargs,
+        )
+
+        return response
+    
+    def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):
+        initial_prompt = f"The user initially asked the question: '{question}': \n\n"
+
+        initial_prompt = Mistral.add_ddl_to_prompt(initial_prompt, ddl_list, max_tokens=14000)
+
+        initial_prompt = Mistral.add_documentation_to_prompt(initial_prompt, doc_list, max_tokens=14000)
+
+        initial_prompt = Mistral.add_sql_to_prompt(initial_prompt, question_sql_list, max_tokens=14000)
+
+        message_log = [Mistral.system_message(initial_prompt)]
+        message_log.append(Mistral.user_message("Generate a list of followup questions that the user might ask about this data. Respond with a list of questions, one per line. Do not answer with any explanations -- just the questions."))
+
+        return message_log
+
+    @staticmethod
+    def system_message(message: str) -> dict:
+        return ChatMessage(role="system", content=message)
+    
+    @staticmethod
+    def user_message(message: str) -> dict:
+        return ChatMessage(role="user", content=message)
+    
+    @staticmethod
+    def assistant_message(message: str) -> dict:
+        return ChatMessage(role="assistant", content=message)
+    
+    @staticmethod
+    def str_to_approx_token_count(string: str) -> int:
+        return len(string) / 4
+    
+    @staticmethod
+    def add_ddl_to_prompt(initial_prompt: str, ddl_list: list[str], max_tokens: int = 14000) -> str:
+        if len(ddl_list) > 0:
+            initial_prompt += f"\nYou may use the following DDL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+    
+            for ddl in ddl_list:
+                if Mistral.str_to_approx_token_count(initial_prompt) + Mistral.str_to_approx_token_count(ddl) < max_tokens:
+                    initial_prompt += f"{ddl}\n\n"
+    
+        return initial_prompt
+    
+    @staticmethod
+    def add_documentation_to_prompt(initial_prompt: str, documentation_list: list[str], max_tokens: int = 14000) -> str:
+        if len(documentation_list) > 0:
+            initial_prompt += f"\nYou may use the following documentation as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+    
+            for documentation in documentation_list:
+                if Mistral.str_to_approx_token_count(initial_prompt) + Mistral.str_to_approx_token_count(documentation) < max_tokens:
+                    initial_prompt += f"{documentation}\n\n"
+    
+        return initial_prompt
+    
+    @staticmethod
+    def add_sql_to_prompt(initial_prompt: str, sql_list: list[str], max_tokens: int = 14000) -> str:
+        if len(sql_list) > 0:
+            initial_prompt += f"\nYou may use the following SQL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+    
+            for question in sql_list:
+                if Mistral.str_to_approx_token_count(initial_prompt) + Mistral.str_to_approx_token_count(question["sql"]) < max_tokens:
+                    initial_prompt += f"{question['question']}\n{question['sql']}\n\n"
+    
+        return initial_prompt
+    
+    def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):
+        initial_prompt = "The user provides a question and you provide SQL. You will only respond with SQL code and not with any explanations.\n\nRespond with only SQL code. Do not answer with any explanations -- just the code.\n"
+        
+        initial_prompt = Mistral.add_ddl_to_prompt(initial_prompt, ddl_list, max_tokens=14000)
+        
+        initial_prompt = Mistral.add_documentation_to_prompt(initial_prompt, doc_list, max_tokens=14000)
+        
+        message_log = [Mistral.system_message(initial_prompt)]
+        
+        for example in question_sql_list:
+            if example is None:
+                print("example is None")
+            else:
+                if example is not None and "question" in example and "sql" in example:
+                    message_log.append(Mistral.user_message(example["question"]))
+                    message_log.append(Mistral.assistant_message(example["sql"]))
+        
+        message_log.append(ChatMessage(role="user", content=question))
+        
+        return message_log
+
+    def generate_sql(self, question: str, **kwargs) -> str:
+        # Use the super generate_sql
+        sql = super().generate_sql(question, **kwargs)
+
+        # Replace "\_" with "_"
+        sql = sql.replace("\\_", "_")
+
+        return sql
+
+    def submit_prompt(self, prompt, **kwargs) -> str:
+        chat_response = self.client.chat(
+            model=self.model,
+            messages=prompt,
+        )
+        
+        return chat_response.choices[0].message.content