JSv4 · marauder37 · Jul 4, 2023 · Jul 4, 2023 · Jul 4, 2023 · Jul 4, 2023
diff --git a/.gitignore b/.gitignore
@@ -162,29 +162,6 @@ typings/
 # Local History for Visual Studio Code
 .history/
 
-
-# Provided default Pycharm Run/Debug Configurations should be tracked by git
-# In case of local modifications made by Pycharm, use update-index command
-# for each changed file, like this:
-# git update-index --assume-unchanged .idea/chat_all_the_docs.iml
-### JetBrains template
-# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
-# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
-
-# User-specific stuff:
-.idea/**/workspace.xml
-.idea/**/tasks.xml
-.idea/dictionaries
-
-# Sensitive or high-churn files:
-.idea/**/dataSources/
-.idea/**/dataSources.ids
-.idea/**/dataSources.xml
-.idea/**/dataSources.local.xml
-.idea/**/sqlDataSources.xml
-.idea/**/dynamic.xml
-.idea/**/uiDesigner.xml
-
 # Gradle:
 .idea/**/gradle.xml
 .idea/**/libraries
@@ -338,3 +315,14 @@ delphic/media/*
 
 ### Models for Question Answering
 cache/*
+
+# https://github.com/cookiecutter/cookiecutter-django/blob/de8759fdbd45ac288b97e050073a5d09f50029db/.gitignore#L211
+# Even though the project might be opened and edited
+# in any of the JetBrains IDEs, it makes no sense whatsoever
+# to 'run' anything within it since any particular cookiecutter
+# is declarative by nature.
+.idea/
+
+### Local configuration files
+/.envs/.local
+/frontend/.frontend
diff --git a/compose/local/django/celery/worker/start b/compose/local/django/celery/worker/start
@@ -4,5 +4,5 @@ set -o errexit
 set -o nounset
 
 
-#exec watchfiles celery.__main__.main --args '-A config.celery_app worker -l INFO'
-exec celery -A config.celery_app worker -l INFO
+exec watchfiles --filter python celery.__main__.main --args '-A config.celery_app worker -l INFO'
+#exec celery -A config.celery_app worker -l INFO
diff --git a/config/api/websockets/queries.py b/config/api/websockets/queries.py
@@ -39,7 +39,9 @@ async def receive(self, text_data):
 
  {query_str}
  """
- response = self.index.query(modified_query_str)
+
+ query_engine = self.index.as_query_engine()
+ response = query_engine.query(modified_query_str)
 
  # Format the response as markdown
  markdown_response = f"## Response\n\n{response}\n\n"

diff --git a/delphic/tasks/index_tasks.py b/delphic/tasks/index_tasks.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import tempfile
@@ -8,7 +9,7 @@
 from django.core.files import File
 from langchain import OpenAI
 from llama_index import (
- GPTSimpleVectorIndex,
+ GPTVectorStoreIndex,
  LLMPredictor,
  ServiceContext,
  download_loader,
@@ -23,11 +24,11 @@
 @celery_app.task
 def create_index(collection_id):
  """
- Celery task to create a GPTSimpleVectorIndex for a given Collection object.
+ Celery task to create a GPTVectorStoreIndex for a given Collection object.
 
  This task takes the ID of a Collection object, retrieves it from the
  database along with its related documents, and saves the document files
- to a temporary directory. Then, it creates a GPTSimpleVectorIndex using
+ to a temporary directory. Then, it creates a GPTVectorStoreIndex using
  the provided code and saves the index to the Comparison.model FileField.
 
  Args:
@@ -60,15 +61,18 @@ def create_index(collection_id):
  with temp_file_path.open("wb") as f:
  f.write(file_data)
 
- # Create the GPTSimpleVectorIndex
- SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
+ # Create the GPTVectorStoreIndex
+ try:
+ SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
+ except Exception as e:
+ logger.error(f"Error downloading SimpleDirectoryReader: {e}")
+ raise
+
  loader = SimpleDirectoryReader(
  tempdir_path, recursive=True, exclude_hidden=False
  )
  documents = loader.load_data()
- # index = GPTSimpleVectorIndex(documents)
 
- # documents = SimpleDirectoryReader(str(tempdir_path)).load_data()
  llm_predictor = LLMPredictor(
  llm=OpenAI(
  temperature=0,
@@ -81,11 +85,11 @@ def create_index(collection_id):
  )
 
  # build index
- index = GPTSimpleVectorIndex.from_documents(
+ index = GPTVectorStoreIndex.from_documents(
  documents, service_context=service_context
  )
 
- index_str = index.save_to_string()
+ index_str = json.dumps(index.storage_context.to_dict())
 
  # Save the index_str to the Comparison.model FileField
  with tempfile.NamedTemporaryFile(delete=False) as f:
@@ -105,7 +109,9 @@ def create_index(collection_id):
  return True
 
  except Exception as e:
- logger.error(f"Error creating index for collection {collection_id}: {e}")
+ logger.error(
+ f"{type(e).__name__} creating index for collection {collection_id}: {e}"
+ )
  collection.status = CollectionStatus.ERROR
  collection.save()
 

diff --git a/delphic/utils/collections.py b/delphic/utils/collections.py
@@ -1,10 +1,11 @@
+import json
 import logging
 import textwrap
 from pathlib import Path
 
 from django.conf import settings
-from langchain import OpenAI
-from llama_index import GPTSimpleVectorIndex, LLMPredictor, ServiceContext
+from llama_index import StorageContext, load_index_from_storage
+from llama_index.indices.base import BaseIndex
 
 from delphic.indexes.models import Collection
 
@@ -27,22 +28,22 @@ def format_source(source):
  return formatted_source
 
 
-async def load_collection_model(collection_id: str | int) -> GPTSimpleVectorIndex:
+async def load_collection_model(collection_id: str | int) -> "BaseIndex":
  """
  Load the Collection model from cache or the database, and return the index.
 
  Args:
  collection_id (Union[str, int]): The ID of the Collection model instance.
 
  Returns:
- GPTSimpleVectorIndex: The loaded index.
+ VectorStoreIndex: The loaded index.
 
  This function performs the following steps:
  1. Retrieve the Collection object with the given collection_id.
  2. Check if a JSON file with the name '/cache/model_{collection_id}.json' exists.
- 3. If the JSON file doesn't exist, load the JSON from the Collection.model FileField and save it to
+ 3. If the JSON file doesn't exist, load the JSON from the `Collection.model` FileField and save it to
  '/cache/model_{collection_id}.json'.
- 4. Call GPTSimpleVectorIndex.load_from_disk with the cache_file_path.
+ 4. Call VectorStoreIndex.load_from_disk with the cache_file_path.
  """
  # Retrieve the Collection object
  collection = await Collection.objects.aget(id=collection_id)
@@ -61,21 +62,12 @@ async def load_collection_model(collection_id: str | int) -> GPTSimpleVectorInde
  with cache_file_path.open("w+", encoding="utf-8") as cache_file:
  cache_file.write(model_file.read().decode("utf-8"))
 
- # define LLM
- logger.info(
- f"load_collection_model() - Setup service context with tokens {settings.MAX_TOKENS} and "
- f"model {settings.MODEL_NAME}"
- )
- llm_predictor = LLMPredictor(
- llm=OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=512)
- )
- service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
-
- # Call GPTSimpleVectorIndex.load_from_disk
+ # Call VectorStoreIndex.load_from_disk
  logger.info("load_collection_model() - Load llama index")
- index = GPTSimpleVectorIndex.load_from_disk(
- cache_file_path, service_context=service_context
- )
+ with cache_file_path.open("r") as cache_file:
+ storage_context = StorageContext.from_dict(json.load(cache_file))
+ index = load_index_from_storage(storage_context)
+
  logger.info(
  "load_collection_model() - Llamaindex loaded and ready for query..."
  )

diff --git a/requirements/base.txt b/requirements/base.txt
@@ -32,6 +32,6 @@ channels_redis
 
 # NLP-Related
 # ------------------------------------------------------------------------------
-llama_index==0.5.25 # https://github.com/jerryjliu/llama_index
+llama_index==0.6.38.post1 # https://github.com/jerryjliu/llama_index
 PyPDF2==3.* # https://pypdf2.readthedocs.io/en/latest/
 docx2txt==0.8