From d85fa97783ff57a7ee9d595734f675b0ab029d04 Mon Sep 17 00:00:00 2001 From: kevin Date: Sat, 7 Oct 2023 17:04:59 +0100 Subject: [PATCH 1/6] Added feature to save the user questions and model answers to CSV based on flag --- 3.20.2 | 1 + utils.py | 0 2 files changed, 1 insertion(+) create mode 100644 3.20.2 create mode 100644 utils.py diff --git a/3.20.2 b/3.20.2 new file mode 100644 index 00000000..8377d3c7 --- /dev/null +++ b/3.20.2 @@ -0,0 +1 @@ +Requirement already satisfied: protobuf in c:\users\kevin\anaconda3\lib\site-packages (4.24.4) diff --git a/utils.py b/utils.py new file mode 100644 index 00000000..e69de29b From ac9418ee2227590e47135769ef42ba12d259e757 Mon Sep 17 00:00:00 2001 From: kevin Date: Sat, 7 Oct 2023 17:18:54 +0100 Subject: [PATCH 2/6] Added feature to save Q&A to CSV based on user flag --- run_localGPT.py | 12 +++++++++++- utils.py | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/run_localGPT.py b/run_localGPT.py index a1816131..0e94ac4a 100644 --- a/run_localGPT.py +++ b/run_localGPT.py @@ -205,7 +205,13 @@ def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"): ), help="model type, llama, mistral or non_llama", ) -def main(device_type, show_sources, use_history, model_type): +@click.option( + "--save_q&a", + is_flag=True, + help="whether to save Q&A pairs to a CSV file (Default is False)", +) + +def main(device_type, show_sources, use_history, model_type, save_qa): """ Implements the main information retrieval task for a localGPT. @@ -257,6 +263,10 @@ def main(device_type, show_sources, use_history, model_type): print("\n> " + document.metadata["source"] + ":") print(document.page_content) print("----------------------------------SOURCE DOCUMENTS---------------------------") + + # Log the Q&A to CSV only if save_qa is True + if save_qa: + log_to_csv(query, answer) if __name__ == "__main__": diff --git a/utils.py b/utils.py index e69de29b..bf6db482 100644 --- a/utils.py +++ b/utils.py @@ -0,0 +1,15 @@ +import os +import csv +from datetime import datetime + +def log_to_csv(query, answer): + filename = "qa_log.csv" + + # Check if the file doesn't exist, to write headers + write_header = not os.path.exists(filename) + + with open(filename, mode='a', newline='', encoding='utf-8') as file: + writer = csv.writer(file) + if write_header: + writer.writerow(["Timestamp", "Question", "Answer"]) + writer.writerow([datetime.now(), query, answer]) \ No newline at end of file From b0d76516ac5537bebf8e23c5425fc4e5c70cf1b6 Mon Sep 17 00:00:00 2001 From: kevin Date: Sun, 8 Oct 2023 13:16:41 +0100 Subject: [PATCH 3/6] fixing unrecognized character & --- run_localGPT.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_localGPT.py b/run_localGPT.py index 0e94ac4a..dbd66225 100644 --- a/run_localGPT.py +++ b/run_localGPT.py @@ -206,7 +206,7 @@ def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"): help="model type, llama, mistral or non_llama", ) @click.option( - "--save_q&a", + "--save_qa", is_flag=True, help="whether to save Q&A pairs to a CSV file (Default is False)", ) From 46c9559c7e6047009148ddbcb17d3fdeb1362059 Mon Sep 17 00:00:00 2001 From: kevin Date: Sun, 22 Oct 2023 20:27:12 +0100 Subject: [PATCH 4/6] fixing no folder error and import --- run_localGPT.py | 3 ++- utils.py | 30 ++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/run_localGPT.py b/run_localGPT.py index dbd66225..50fa12fd 100644 --- a/run_localGPT.py +++ b/run_localGPT.py @@ -2,6 +2,7 @@ import logging import click import torch +import utils from langchain.chains import RetrievalQA from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.llms import HuggingFacePipeline @@ -266,7 +267,7 @@ def main(device_type, show_sources, use_history, model_type, save_qa): # Log the Q&A to CSV only if save_qa is True if save_qa: - log_to_csv(query, answer) + utils.log_to_csv(query, answer) if __name__ == "__main__": diff --git a/utils.py b/utils.py index bf6db482..0440d214 100644 --- a/utils.py +++ b/utils.py @@ -2,14 +2,24 @@ import csv from datetime import datetime -def log_to_csv(query, answer): - filename = "qa_log.csv" - - # Check if the file doesn't exist, to write headers - write_header = not os.path.exists(filename) - - with open(filename, mode='a', newline='', encoding='utf-8') as file: +def log_to_csv(question, answer): + + log_dir, log_file = "local_chat_history", "qa_log.csv" + # Ensure log directory exists, create if not + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Construct the full file path + log_path = os.path.join(log_dir, log_file) + + # Check if file exists, if not create and write headers + if not os.path.isfile(log_path): + with open(log_path, mode='w', newline='', encoding='utf-8') as file: + writer = csv.writer(file) + writer.writerow(["timestamp", "question", "answer"]) + + # Append the log entry + with open(log_path, mode='a', newline='', encoding='utf-8') as file: writer = csv.writer(file) - if write_header: - writer.writerow(["Timestamp", "Question", "Answer"]) - writer.writerow([datetime.now(), query, answer]) \ No newline at end of file + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + writer.writerow([timestamp, question, answer]) \ No newline at end of file From 8d0fe21541d61530024ccc620236c956ba7cf7d3 Mon Sep 17 00:00:00 2001 From: kevin Date: Sun, 22 Oct 2023 20:43:46 +0100 Subject: [PATCH 5/6] Added feature to save Q&A to CSV based on user flag --- load_models.py | 10 +++++----- local_chat_history/qa_log.csv | 4 ++++ qa_log.csv | 2 ++ 3 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 local_chat_history/qa_log.csv create mode 100644 qa_log.csv diff --git a/load_models.py b/load_models.py index b7fa5a37..b9eb909f 100644 --- a/load_models.py +++ b/load_models.py @@ -141,11 +141,11 @@ def load_full_model(model_id, model_basename, device_type, logging): torch_dtype=torch.float16, low_cpu_mem_usage=True, cache_dir=MODELS_PATH, - # trust_remote_code=True, # set these if you are using NVIDIA GPU - # load_in_4bit=True, - # bnb_4bit_quant_type="nf4", - # bnb_4bit_compute_dtype=torch.float16, - # max_memory={0: "15GB"} # Uncomment this line with you encounter CUDA out of memory errors + trust_remote_code=True, # set these if you are using NVIDIA GPU + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + max_memory={0: "15GB"} # Uncomment this line with you encounter CUDA out of memory errors ) model.tie_weights() return model, tokenizer diff --git a/local_chat_history/qa_log.csv b/local_chat_history/qa_log.csv new file mode 100644 index 00000000..6850fd9c --- /dev/null +++ b/local_chat_history/qa_log.csv @@ -0,0 +1,4 @@ +timestamp,question,answer +2023-10-22 20:18:47,Tell me in 20 words what is the most important take away from the document, Steven feels ambivalent about selling due to concerns about cultural changes and financial benefits. +2023-10-22 20:20:52,tell me more in 10 words," Of course! I'm here to help you with your question. Please provide the task or question you'd like me to answer, and I will do my best to assist you with a detailed and long response." +2023-10-22 20:22:11,What is the text about in 20 words, The text celebrates women's contributions and promotes inclusivity. diff --git a/qa_log.csv b/qa_log.csv new file mode 100644 index 00000000..6cde4bfb --- /dev/null +++ b/qa_log.csv @@ -0,0 +1,2 @@ +Timestamp,Question,Answer +2023-10-22 19:56:06.648611,Make a maximun 30 words summary about what the document is about," Summary: The document discusses the development and evaluation of an AI model called Orca, which is designed to generate human-like text based on given prompts and instructions. Orca is able to bridge the gap between OpenAI's Text-da-Vinci-003 model and ChatGPT in various professional and academic exams, demonstrating its effectiveness in generating accurate and informative responses." From 5d3b1a776995deb9d14f863d5f849227e3978cba Mon Sep 17 00:00:00 2001 From: kevin Date: Sun, 22 Oct 2023 20:48:19 +0100 Subject: [PATCH 6/6] Remove .csv files from repository --- local_chat_history/qa_log.csv | 4 ---- qa_log.csv | 2 -- 2 files changed, 6 deletions(-) delete mode 100644 local_chat_history/qa_log.csv delete mode 100644 qa_log.csv diff --git a/local_chat_history/qa_log.csv b/local_chat_history/qa_log.csv deleted file mode 100644 index 6850fd9c..00000000 --- a/local_chat_history/qa_log.csv +++ /dev/null @@ -1,4 +0,0 @@ -timestamp,question,answer -2023-10-22 20:18:47,Tell me in 20 words what is the most important take away from the document, Steven feels ambivalent about selling due to concerns about cultural changes and financial benefits. -2023-10-22 20:20:52,tell me more in 10 words," Of course! I'm here to help you with your question. Please provide the task or question you'd like me to answer, and I will do my best to assist you with a detailed and long response." -2023-10-22 20:22:11,What is the text about in 20 words, The text celebrates women's contributions and promotes inclusivity. diff --git a/qa_log.csv b/qa_log.csv deleted file mode 100644 index 6cde4bfb..00000000 --- a/qa_log.csv +++ /dev/null @@ -1,2 +0,0 @@ -Timestamp,Question,Answer -2023-10-22 19:56:06.648611,Make a maximun 30 words summary about what the document is about," Summary: The document discusses the development and evaluation of an AI model called Orca, which is designed to generate human-like text based on given prompts and instructions. Orca is able to bridge the gap between OpenAI's Text-da-Vinci-003 model and ChatGPT in various professional and academic exams, demonstrating its effectiveness in generating accurate and informative responses."