Intrigued by RAG's potential but lost in the labyrinth of code? Fear not, aspiring linguist! Buckle up for a playful Python playground where you can tinker with RAG concepts right on your local machine. Let's ditch the deep learning deployment and dive into an educational exploration of RAG, one command line at a time! ragfmk is a simple framework to implement and play with RAG.
This project aims to run locally (i mean on your laptop without GPUs) and leverages:
- Ollama (https://ollama.com/) for running locally LLMs
- Sentence-transformers (https://pypi.org/project/sentence-transformers/) for the embeddings management
- potentially llamaParse from llamaindex (https://docs.llamaindex.ai/) can be used.
- langchain (https://www.langchain.com/) for chunking (semantic or character)
- Meta FAISS for similarity search (also enable storing and loading indexes)
- ChromaDB for storing and searching into the vector store
- PyMuPDF by default to read and convert PDF content
Several objects are provided to manage the main RAG features and characteristics:
- rag: is the main interface for managing all needed request.
- IDocument: manages the document reading and loading (pdf or direct content)
- IChunks: manages the chunks list
- IEmbeddings: Manages the vector and data embeddings
- INearest: Manages the k nearest neighbors retreived by the similarity search engine
- IPrompt: Manages Prompt templating and simple prompt
Two PDF reading methods are supported:
- via PyMuPDF
- via Llamaparse
Two Chunks methods are supported:
- Character chunkink (langchain)
- Semantic chunking (langchain)
Two Vectors stores are currently supported:
- FAISS: search + load and store indexes
- ChromaDB
Two Embeddings methods are supported:
- via HF Sentence Transformer (the model can be changed)
- via Ollama Embeddings Models (the model can be changed)
- Install ollama (https://ollama.com/)
- Run ollama in the command line and pull at least one model. tinydolphin is a good choice as it is a very small model and can then run on a simple laptop without a big lattency.
- Install ragfmk by using pip:
pip install [--force-reinstall] wheel file
or via pypi:
pip install ragfmk
Note: Some environment variables may need to be set:
- If you plan to use llamaParse, the llamaindex key (generated on the web site: https://cloud.llamaindex.ai/login) must be filled out to LLAMAINDEX_API_KEY
- If a specific log file must be specified (by default the programs create the ragcli.log file in the working directory) create the environment variable RAGFMK_LOGFILE with the file and path accordingly.
from ragfmk.rag import rag
def ragcall(filename, mode, output):
myRag = rag()
response = myRag.readPDF(filename, mode)
if (len(output)>0):
response.save(output)
return response.content, myRag.milestones.getFullJSON()
from ragfmk.rag import rag
def ragcall(model, url, prompt, temperature):
myRag = rag()
response = myRag.promptLLM(prompt, url, model, temperature)
return response, myRag.milestones.getFullJSON()
from ragfmk.rag import rag
from ragfmk.elements.wrappers.document import document
def chunking(sep, mode, csize, coverlap, text, filename, output):
myRag = rag()
doc = document()
if (len(text) >0):
doc.content= text
else:
doc.load(filename)
if (mode == 0):
cks = myRag.charChunk(doc, sep, csize, coverlap)
else:
cks = myRag.semChunk(doc)
if (len(output) >0):
cks.save(output)
return cks.size, cks.jsonContent, myRag.milestones.getFullJSON()
from ragfmk.rag import rag
from ragfmk.elements.wrappers.chunks import chunks
def ragcall(text, output):
myRag = rag()
cks = chunks()
cks.add(text)
embeddings = myRag.createEmbeddings(cks)
if (len(output) > 0):
embeddings.save(output)
return embeddings.size, embeddings.jsonContent, myRag.milestones.getFullJSON()
from ragfmk.rag import rag
from ragfmk.elements.wrappers.chunks import chunks
def ragcall(filename, content, output):
myRag = rag()
cks = chunks()
if (len(content)>0):
cks.jsonContent = content
else:
cks.load(filename)
embeddings = myRag.createEmbeddings(cks)
if (len(output) > 0):
embeddings.save(output)
return embeddings.size, embeddings.jsonContent, myRag.milestones.getFullJSON()
from ragfmk.ragFAISS import ragFAISS
from ragfmk.elements.embeddings.stEmbeddings import stEmbeddings
def ragcall(filename,content, idx, store):
myRag = ragFAISS()
vect = stEmbeddings()
myRag.indexName = idx
if (len(content)>0):
vect.content = content
else:
vect.load(filename=filename)
myRag.addEmbeddings(vect)
myRag.storagePath = store
myRag.saveIndex()
return myRag.milestones.getFullJSON()
from ragfmk.ragFAISS import ragFAISS
from ragfmk.elements.embeddings.stEmbeddings import stEmbeddings
def ragcall(filename, content, idx, store, k, output):
myRag = ragFAISS()
vPrompt = stEmbeddings()
myRag.indexName = idx
myRag.storagePath = store
myRag.initSearchEngine()
if (len(content)>0):
vPrompt.jsonContent = content
else:
vPrompt.load(filename=filename)
similars = myRag.processSearch(k, vPrompt)
if (len(output) > 0):
similars.save(output)
return similars.jsonContent, myRag.milestones.getFullJSON()
from ragfmk.ragFAISS import ragFAISS
from ragfmk.elements.embeddings.stEmbeddings import stEmbeddings
def ragcall(prompt, k, output, contexts):
myRag = ragFAISS()
vChunks = stEmbeddings()
vPrompt = stEmbeddings()
myRag.initSearchEngine()
vPrompt.jsonContent = prompt
vChunks.jsonContent = contexts
myRag.addEmbeddings(vChunks)
similars = myRag.processSearch(k, vPrompt)
if (len(output) > 0):
similars.save(output)
return similars.jsonContent, myRag.milestones.getFullJSON()
from ragfmk.rag import rag
from ragfmk.elements.wrappers.nearest import nearest
def ragcall(question, nearestfile, content):
myRag = rag()
nr = nearest()
if (len(content)>0):
nr.jsonContent = content
else:
nr.load(nearestfile)
resp = myRag.buildPrompt(question, nr)
return resp, myRag.milestones.getFullJSON()