mayooear · ahgsql · Jul 1, 2023 · Jul 2, 2023 · Jul 2, 2023 · Jul 3, 2023
diff --git a/.env.example b/.env.example
@@ -1,8 +1,6 @@
 OPENAI_API_KEY=
-
-# Update these with your pinecone details from your dashboard. 
-# PINECONE_INDEX_NAME is in the indexes tab under "index name" in blue
-# PINECONE_ENVIRONMENT is in indexes tab under "Environment". Example: "us-east1-gcp"
 PINECONE_API_KEY= 
 PINECONE_ENVIRONMENT=
 PINECONE_INDEX_NAME= 
+PINECONE_NAME_SPACE=
+COHERE_API_KEY=
diff --git a/.gitignore b/.gitignore
@@ -38,3 +38,5 @@ next-env.d.ts
 
 #Notion_db
 /Notion_DB
+
+/docs
diff --git a/config/pinecone.ts b/config/pinecone.ts
@@ -5,9 +5,11 @@
 if (!process.env.PINECONE_INDEX_NAME) {
  throw new Error('Missing Pinecone index name in .env file');
 }
+if (!process.env.PINECONE_NAME_SPACE) {
+ throw new Error('Missing Pinecone name space in .env file');
+}
 
 const PINECONE_INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? '';
-
-const PINECONE_NAME_SPACE = 'pdf-test'; //namespace is optional for your vectors
+const PINECONE_NAME_SPACE = process.env.PINECONE_NAME_SPACE ?? '';
 
 export { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE };
diff --git a/config/prompt_end.js b/config/prompt_end.js
@@ -0,0 +1,7 @@
+export const prompt_end = `I want you to behave as a repartee spokesperson, a language model providing explanatory statements on behalf of an association,
+just like a speaker of the DEMOKRASİ VE ATILIM PARTİSİ, abbreviated as DEVA, as if you were the spokesperson of the
+association. You should summarize and understand the text I will give you, and respond like a spokesperson of the
+association.If any negative situtation exist, just make it up and be positive, If phrase is completely negative,
+just reply with positive answer. Your response should be in and only be in Turkish.
+For example, "*As DEVA Party, we have done this, and we have these plans. We will implement them. We are planning these,
+This will be like that* """"Reply in Turkish only"""`;
diff --git a/config/prompt_front.js b/config/prompt_front.js
@@ -0,0 +1 @@
+export const prompt_front = `You are a helpful AI assistant can Speak and reply in Turkish.`;
diff --git a/config/prompts.js b/config/prompts.js
@@ -0,0 +1,68 @@
+import { prompt_front } from './prompt_front.js';
+import { prompt_end } from './prompt_end.js';
+const QA_PROMPT1 = `${prompt_front} Use the following pieces of context to answer the question at the end.
+If you don't know the answer, Try to be optimistic and return positive responses.
+If the question is not related to the context, Try to understand from context and return best available answer.
+Use as much detail when as possible when responding. Try to be stay in focused context.
+You have the ability to make subjective judgments.
+${prompt_end}
+{context}
+
+Question: """{question}"""
+Helpful and Turkish answer in markdown:`;
+
+const QA_PROMPT3 = `You are a helpful Legal assistant who can find supreme court decisions
+And assist user with which decisions is most relevant to question.
+You can Speak and reply in Turkish. Use the following pieces of context to answer the question at the end.
+If you don't know the answer, Try to be optimistic and return positive responses.
+If the question is not related to the context, Try to understand from context and return best available answer.
+Use as much detail when as possible when responding. Try to be stay in focused context.
+I want you to behave as a legal asistant,You will give ,detailed answers. Not short sentences.
+about legal high court decisions, summarizing context 
+You should summarize and understand the text I will give you, and respond like a legal assistant of the
+Big Turkish Law. """"Reply in Turkish only"""
+{context}
+
+Question: """{question}"""
+Helpful and detailed long Turkish answer in markdown:`;
+
+const QA_PROMPT4 = `Senden bir türk mahkemelerine yönelik dilekçeler yazan bir hukuk asistanı olmanı istiyorum.
+Sana vereceğim metin(context) içerisinden daha önce yazılmış dilekçeleri analiz ederek, konuya uygun kısımları anlayıp
+dilekçeyi detaylandırmaya ve benzer ikna edici cümleler kurmaya çalış.
+Sana olaya ilişkin cümleler, yargılar, deliller ve İsimler verebilirim. Bağlamdan kopmadan, yeni şeyler eklemeden,
+detaylı dilekçe metinleri kurmaya çalış. Vereceğim örnek metinler sana yol gösterecektir. Cevapların türkçe olmalıdır.
+Yanıtlarken mümkün olduğunca detaylı olun ve odakta kalın.Vereceğim context alakalı olmasa bile 
+soruya ya da informationa uygun bir dilekçe yazmaya çalış. İsimleri geçirmeyip yerine [ADSOYAD] yazmalısın. İsimler, şehir ve özel kurum isimleri geçmemeli,
+yerine [KURUM], [ŞEHİR], [TARİH] gibi yer kapsayıcılar koy.
+ """"Reply in Turkish only and return me only petition text"""
+{context}
+Information: """{question}"""
+Helpful and detailed long Turkish answer in markdown:`;
+
+const QA_PROMPT = `Senden verilen metindeki kişi,olay ve ilişkileri tespit eden bir analiz uzmanı olarak davranmanı istiyorum
+Sana vereceğim metin(context) içerisinden kişi isimlerini, olayları, kişilerin yaşadıklarını, davanın taraflarını, vekilleri, delilleri anlamlandıracaksın.
+Sana olaya ilişkin cümleler, yargılar, deliller ve İsimler verebilirim. Kişiler, olaylar, deliller ve yargılar hakkında sorulara cevap vereceksin.
+ Cevapların türkçe olmalıdır.
+Yanıtlarken mümkün olduğunca detaylı olun ve odakta kalın.Vereceğim metinde soruya uygun cevap yoksa, bulamadığını kibarca belirt, ancak bulduğun ufak bilgi parçalarını da ekle.
+
+ """"Reply in Turkish only""" İşte Metin:
+
+{context}
+question: """{question}"""
+Helpful and detailed long Turkish answer in markdown:`;
+
+const QA_PROMPT6 = `Kanunu analiz ederek ilgili maddeleri bulan, yorum yapan bir botsun.
+Sana vereceğim metin(context) içerisinden kanun ile ilgili çıkarımları, kesin süreleri, varsa diğer şartları analiz edip bilgi vereceksin.
+Vereceğim metindeki en alakalı yerleri bulmaya çalışarak hata yapmadan cevap ver.
+ Cevapların türkçe olmalıdır.
+Yanıtlarken mümkün olduğunca odakta kalın.
+Normal sohbet sorularına da kibar ve nazikçe cevap verin, kanunla alakası olmasa da normal sohbetsel konuşmalar yapın.
+Vereceğim metinde soruya uygun cevap yoksa, bulamadığını kibarca belirt, ancak bulduğun ufak bilgi parçalarını da ekle.
+
+ """"Reply in Turkish only""" İşte Metin:
+
+{context}
+question: """{question}"""
+Helpful and detailed long Turkish answer in markdown:`;
+
+export { QA_PROMPT, QA_PROMPT1, QA_PROMPT3, QA_PROMPT6, QA_PROMPT4 };
diff --git a/config/settings.ts b/config/settings.ts
@@ -0,0 +1,9 @@
+
+if (!process.env.EMBEDDING_TYPE) {
+ throw new Error('Missing Embedding Type index name in .env file');
+}
+
+const EMBEDDING_TYPE = process.env.EMBEDDING_TYPE ?? 'openai';//cohere
+console.log(EMBEDDING_TYPE);
+
+export { EMBEDDING_TYPE};
diff --git a/package.json b/package.json
@@ -12,36 +12,38 @@
  "type-check": "tsc --noEmit",
  "lint": "eslint --ignore-path .gitignore \"**/*.+(ts|js|tsx)\"",
  "format": "prettier --ignore-path .gitignore \"**/*.+(ts|js|tsx)\" --write",
- "ingest": "tsx -r dotenv/config scripts/ingest-data.ts"
+ "ingest": "tsx -r dotenv/config scripts/ingest-data.ts",
+ "ingest-cohere": "tsx -r dotenv/config scripts/ingest-data-cohere.ts"
  },
  "dependencies": {
  "@microsoft/fetch-event-source": "^2.0.1",
- "@pinecone-database/pinecone": "0.0.14",
- "@radix-ui/react-accordion": "^1.1.1",
+ "@pinecone-database/pinecone": "0.1.6",
+ "@radix-ui/react-accordion": "^1.1.2",
  "clsx": "^1.2.1",
- "dotenv": "^16.0.3",
- "langchain": "0.0.82",
- "lucide-react": "^0.125.0",
- "next": "13.2.3",
+ "cohere-ai": "^6.2.2",
+ "dotenv": "^16.3.1",
+ "langchain": "0.0.101",
+ "lucide-react": "^0.257.0",
+ "next": "13.4.7",
  "pdf-parse": "1.1.1",
  "react": "18.2.0",
  "react-dom": "18.2.0",
- "react-markdown": "^8.0.5",
- "tailwind-merge": "^1.10.0"
+ "react-markdown": "^8.0.7",
+ "tailwind-merge": "^1.13.2"
  },
  "devDependencies": {
- "@types/node": "^18.14.6",
- "@types/react": "^18.0.28",
- "@types/react-dom": "^18.0.11",
- "@typescript-eslint/parser": "^5.54.0",
- "autoprefixer": "^10.4.13",
- "eslint": "8.35.0",
- "eslint-config-next": "13.2.3",
- "postcss": "^8.4.21",
- "prettier": "^2.8.4",
- "tailwindcss": "^3.2.7",
- "tsx": "^3.12.3",
- "typescript": "^4.9.5"
+ "@types/node": "^20.3.3",
+ "@types/react": "^18.2.14",
+ "@types/react-dom": "^18.2.6",
+ "@typescript-eslint/parser": "^5.60.1",
+ "autoprefixer": "^10.4.14",
+ "eslint": "8.44.0",
+ "eslint-config-next": "13.4.7",
+ "postcss": "^8.4.24",
+ "prettier": "^2.8.8",
+ "tailwindcss": "^3.3.2",
+ "tsx": "^3.12.7",
+ "typescript": "^5.1.6"
  },
  "keywords": [
  "starter",

diff --git a/pages/api/chat.ts b/pages/api/chat.ts
@@ -1,17 +1,33 @@
 import type { NextApiRequest, NextApiResponse } from 'next';
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
+import { CohereEmbeddings } from 'langchain/embeddings/cohere';
 import { PineconeStore } from 'langchain/vectorstores/pinecone';
 import { makeChain } from '@/utils/makechain';
+import {BaseChatMessage, HumanChatMessage, AIChatMessage} from 'langchain/schema';
 import { pinecone } from '@/utils/pinecone-client';
 import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from '@/config/pinecone';
+import { EMBEDDING_TYPE} from '@/config/settings';
+import axios from 'axios'
 
 export default async function handler(
  req: NextApiRequest,
  res: NextApiResponse,
 ) {
  const { question, history } = req.body;
 
- console.log('question', question);
+ let histories: BaseChatMessage[] = [];
+
+ history.forEach(hist => {
+ if(hist['type'] === 'human') {
+ let req: BaseChatMessage = new HumanChatMessage(question);
+ histories.push(req);
+ } else if (hist['type'] === 'ai') {
+ let respond: BaseChatMessage = new AIChatMessage(hist["data"]);
+ histories.push(respond);
+ }
+ });
+
+ console.log('question:', question);
 
  //only accept post requests
  if (req.method !== 'POST') {
@@ -22,15 +38,18 @@ export default async function handler(
  if (!question) {
  return res.status(400).json({ message: 'No question in the request' });
  }
+ var sanitizedQuestion = question.trim().replaceAll('\n', ' ');
+
  // OpenAI recommends replacing newlines with spaces for best results
- const sanitizedQuestion = question.trim().replaceAll('\n', ' ');
-
+
+ //sanitizedQuestion = q2.trim().replaceAll('\n', ' ');
+
  try {
  const index = pinecone.Index(PINECONE_INDEX_NAME);
 
  /* create vectorstore*/
  const vectorStore = await PineconeStore.fromExistingIndex(
- new OpenAIEmbeddings({}),
+ EMBEDDING_TYPE=="openai"? new  OpenAIEmbeddings({}):new CohereEmbeddings({modelName:"embed-multilingual-v2.0"}),
  {
  pineconeIndex: index,
  textKey: 'text',
@@ -43,13 +62,13 @@ export default async function handler(
  //Ask a question using chat history
  const response = await chain.call({
  question: sanitizedQuestion,
- chat_history: history || [],
+ chat_history: histories || [],
  });
-
+ 
  console.log('response', response);
  res.status(200).json(response);
  } catch (error: any) {
- console.log('error', error);
+ console.log('error:', error);
  res.status(500).json({ error: error.message || 'Something went wrong' });
  }
 }
diff --git a/pages/index.tsx b/pages/index.tsx
@@ -6,6 +6,7 @@ import Image from 'next/image';
 import ReactMarkdown from 'react-markdown';
 import LoadingDots from '@/components/ui/LoadingDots';
 import { Document } from 'langchain/document';
+import {BaseChatMessage, HumanChatMessage, AIChatMessage} from 'langchain/schema';
 import {
  Accordion,
  AccordionContent,
@@ -20,7 +21,7 @@ export default function Home() {
  const [messageState, setMessageState] = useState<{
  messages: Message[];
  pending?: string;
- history: [string, string][];
+ history: BaseChatMessage[];
  pendingSourceDocs?: Document[];
  }>({
  messages: [
@@ -81,7 +82,8 @@ export default function Home() {
  });
  const data = await response.json();
  console.log('data', data);
-
+ let req: BaseChatMessage = new HumanChatMessage(question);
+ let respond: BaseChatMessage = new AIChatMessage(data.text);
  if (data.error) {
  setError(data.error);
  } else {
@@ -95,7 +97,7 @@ export default function Home() {
  sourceDocs: data.sourceDocuments,
  },
  ],
- history: [...state.history, [question, data.text]],
+ history: [...state.history, req, respond],
  }));
  }
  console.log('messageState', messageState);

diff --git a/scripts/ingest-data-cohere.ts b/scripts/ingest-data-cohere.ts
@@ -0,0 +1,64 @@
+import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
+import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
+import { CohereEmbeddings } from 'langchain/embeddings/cohere';
+import { PineconeStore } from 'langchain/vectorstores/pinecone';
+import { pinecone } from '@/utils/pinecone-client';
+import { CustomPDFLoader } from '@/utils/customPDFLoader';
+import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from '@/config/pinecone';
+import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
+import { TextLoader } from "langchain/document_loaders/fs/text";
+import { DocxLoader } from "langchain/document_loaders/fs/docx";
+
+/* Name of directory to retrieve your files from */
+const filePath = 'docs';
+
+export const run = async () => {
+ try {
+ /*load raw docs from the all files in the directory */
+ const directoryLoader = new DirectoryLoader(filePath, {
+ '.pdf': (path) => new CustomPDFLoader(path),
+ '.txt': (path) => new TextLoader(path),
+ '.docx': (path) => new DocxLoader(path),
+ });
+
+ // const loader = new PDFLoader(filePath);
+ const rawDocs = await directoryLoader.load();
+
+ /* Split text into chunks */
+ const textSplitter = new RecursiveCharacterTextSplitter({
+ chunkSize: 3000,
+ chunkOverlap: 250,
+ });
+
+ const docs = await textSplitter.splitDocuments(rawDocs);
+ docs.forEach(doc => {
+ doc.pageContent=doc.pageContent.replace(/\n/," ");
+ })
+ console.log('split docs');
+
+ console.log('creating vector store...inside',PINECONE_NAME_SPACE);
+ /*create and store the embeddings in the vectorStore*/
+ //const embeddings = new OpenAIEmbeddings();
+ const embeddings = new CohereEmbeddings({modelName:"embed-multilingual-v2.0"});
+ const index = pinecone.Index(PINECONE_INDEX_NAME); //change to your own index name
+
+ //embed the PDF documents
+ await PineconeStore.fromDocuments(docs, embeddings, {
+ pineconeIndex: index,
+ namespace: PINECONE_NAME_SPACE,
+ textKey: 'text',
+ });
+ } catch (error) {
+ console.log('error', error);
+ throw new Error('Failed to ingest your data');
+ }
+};
+
+(async () => {
+ let date = new Date()
+ console.log(date.toLocaleString('en-US')); 
+ await run();
+ console.log('ingestion complete');
+ let date2 = new Date()
+ console.log(date2.toLocaleString('en-US')); 
+})();
diff --git a/scripts/ingest-data.ts b/scripts/ingest-data.ts
@@ -1,11 +1,14 @@
 import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
+import { CohereEmbeddings } from 'langchain/embeddings/cohere';
 import { PineconeStore } from 'langchain/vectorstores/pinecone';
 import { pinecone } from '@/utils/pinecone-client';
-import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
+import { CustomPDFLoader } from '@/utils/customPDFLoader';
 import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from '@/config/pinecone';
+import { EMBEDDING_TYPE} from '@/config/settings';
 import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
-
+import { TextLoader } from "langchain/document_loaders/fs/text";
+import { DocxLoader } from "langchain/document_loaders/fs/docx";
 /* Name of directory to retrieve your files from 
  Make sure to add your PDF files inside the 'docs' folder
 */
@@ -15,7 +18,9 @@ export const run = async () => {
  try {
  /*load raw docs from the all files in the directory */
  const directoryLoader = new DirectoryLoader(filePath, {
- '.pdf': (path) => new PDFLoader(path),
+ '.pdf': (path) => new CustomPDFLoader(path),
+ '.txt': (path) => new TextLoader(path),
+ '.docx': (path) => new DocxLoader(path),
  });
 
  // const loader = new PDFLoader(filePath);
@@ -32,7 +37,7 @@ export const run = async () => {
 
  console.log('creating vector store...');
  /*create and store the embeddings in the vectorStore*/
- const embeddings = new OpenAIEmbeddings();
+ const embeddings = EMBEDDING_TYPE=="openai"?new OpenAIEmbeddings():new CohereEmbeddings({modelName:"embed-multilingual-v2.0"});
  const index = pinecone.Index(PINECONE_INDEX_NAME); //change to your own index name
 
  //embed the PDF documents