chatchat-space · rongchenlin · Feb 1, 2024 · Feb 1, 2024 · Feb 6, 2024 · Feb 6, 2024
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@
 ### ⚠️ 重要提示
 
 `0.2.10`将会是`0.2.x`系列的最后一个版本，`0.2.x`系列版本将会停止更新和技术支持，全力研发具有更强应用性的 `Langchain-Chatchat 0.3.x`。
-`0.2.10` 的后续 bug 修复将会直接推送到`master`分支，而不在进行版本更新。
+`0.2.10` 的后续 bug 修复将会直接推送到`master`分支，而不再进行版本更新。
 
 ---
 
@@ -193,7 +193,7 @@ $ python startup.py -a
 [![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
 
 ### 项目交流群
-<img src="img/qr_code_90.jpg" alt="二维码" width="300" />
+<img src="img/qr_code_100.jpg" alt="二维码" width="300" />
 
 🎉 Langchain-Chatchat 项目微信交流群，如果你也对本项目感兴趣，欢迎加入群聊参与讨论交流。
 

diff --git a/document_loaders/FilteredCSVloader.py b/document_loaders/FilteredCSVloader.py
@@ -59,23 +59,26 @@ def __read_file(self, csvfile: TextIOWrapper) -> List[Document]:
  docs = []
  csv_reader = csv.DictReader(csvfile, **self.csv_args) # type: ignore
  for i, row in enumerate(csv_reader):
- if self.columns_to_read[0] in row:
- content = row[self.columns_to_read[0]]
- # Extract the source if available
- source = (
- row.get(self.source_column, None)
- if self.source_column is not None
- else self.file_path
- )
- metadata = {"source": source, "row": i}
+ content = []
+ for col in self.columns_to_read:
+ if col in row:
+ content.append(f'{col}:{str(row[col])}')
+ else:
+ raise ValueError(f"Column '{self.columns_to_read[0]}' not found in CSV file.")
+ content = '\n'.join(content)
+ # Extract the source if available
+ source = (
+ row.get(self.source_column, None)
+ if self.source_column is not None
+ else self.file_path
+ )
+ metadata = {"source": source, "row": i}
 
-  for col in self.metadata_columns:
-  if col in row:
-  metadata[col] = row[col]
+ for col in self.metadata_columns:
+ if col in row:
+ metadata[col] = row[col]
 
- doc = Document(page_content=content, metadata=metadata)
- docs.append(doc)
- else:
- raise ValueError(f"Column '{self.columns_to_read[0]}' not found in CSV file.")
+ doc = Document(page_content=content, metadata=metadata)
+ docs.append(doc)
 
  return docs
diff --git a/img/qr_code_100.jpg b/img/qr_code_100.jpg
diff --git a/img/qr_code_90.jpg b/img/qr_code_90.jpg
diff --git a/img/qr_code_91.jpg b/img/qr_code_91.jpg
diff --git a/img/qr_code_92.jpg b/img/qr_code_92.jpg
diff --git a/img/qr_code_93.jpg b/img/qr_code_93.jpg
diff --git a/img/qr_code_94.jpg b/img/qr_code_94.jpg
diff --git a/img/qr_code_95.jpg b/img/qr_code_95.jpg
diff --git a/img/qr_code_96.jpg b/img/qr_code_96.jpg
diff --git a/img/qr_code_97.jpg b/img/qr_code_97.jpg
diff --git a/img/qr_code_98.jpg b/img/qr_code_98.jpg
diff --git a/img/qr_code_99.jpg b/img/qr_code_99.jpg
diff --git a/img/qrcode_90_2.jpg b/img/qrcode_90_2.jpg
diff --git a/requirements_webui.txt b/requirements_webui.txt
@@ -6,4 +6,4 @@ streamlit-modal==0.1.0
 streamlit-aggrid==0.3.4.post3
 httpx==0.26.0
 httpx_sse==0.4.0
-watchdog=s=3.0.0
+watchdog==3.0.0
diff --git a/server/agent/tools_select.py b/server/agent/tools_select.py
@@ -29,7 +29,7 @@
  Tool.from_function(
  func=search_knowledgebase_complex,
  name="search_knowledgebase_complex",
- description="Use Use this tool to search local knowledgebase and get information",
+ description="Use this tool to search local knowledgebase and get information",
  args_schema=KnowledgeSearchInput,
  ),
  Tool.from_function(
@@ -47,7 +47,7 @@
  Tool.from_function(
  func=search_youtube,
  name="search_youtube",
- description="use this tools to search youtube videos",
+ description="use this tool to search youtube videos",
  args_schema=YoutubeInput,
  ),
 ]

diff --git a/server/knowledge_base/kb_doc_api.py b/server/knowledge_base/kb_doc_api.py
@@ -39,6 +39,9 @@ def search_docs(
  data = [DocumentWithVSId(**x[0].dict(), score=x[1], id=x[0].metadata.get("id")) for x in docs]
  elif file_name or metadata:
  data = kb.list_docs(file_name=file_name, metadata=metadata)
+ for d in data:
+ if "vector" in d.metadata:
+ del d.metadata["vector"]
  return data
 
 

diff --git a/server/knowledge_base/kb_service/base.py b/server/knowledge_base/kb_service/base.py
@@ -222,6 +222,20 @@ def list_docs(self, file_name: str = None, metadata: Dict = {}) -> List[Document
  pass
  return docs
 
+ def get_relative_source_path(self,filepath: str):
+ '''
+ 将文件路径转化为相对路径，保证查询时一致
+ '''
+ relative_path = filepath
+ if os.path.isabs(relative_path):
+ try:
+ relative_path = Path(filepath).relative_to(self.doc_path)
+ except Exception as e:
+ print(f"cannot convert absolute path ({source}) to relative path. error is : {e}")
+
+ relative_path = str(relative_path.as_posix().strip("/"))
+ return relative_path
+
  @abstractmethod
  def do_create_kb(self):
  """

diff --git a/server/knowledge_base/kb_service/es_kb_service.py b/server/knowledge_base/kb_service/es_kb_service.py
@@ -184,7 +184,7 @@ def do_delete_doc(self, kb_file, **kwargs):
  query = {
  "query": {
  "term": {
- "metadata.source.keyword": kb_file.filepath
+ "metadata.source.keyword": self.get_relative_source_path(kb_file.filepath)
  }
  }
  }

diff --git a/server/knowledge_base/kb_service/pg_kb_service.py b/server/knowledge_base/kb_service/pg_kb_service.py
@@ -28,9 +28,9 @@ def _load_pg_vector(self):
 
  def get_doc_by_ids(self, ids: List[str]) -> List[Document]:
  with Session(PGKBService.engine) as session:
- stmt = text("SELECT document, cmetadata FROM langchain_pg_embedding WHERE collection_id in :ids")
+ stmt = text("SELECT document, cmetadata FROM langchain_pg_embedding WHERE custom_id = ANY(:ids)")
  results = [Document(page_content=row[0], metadata=row[1]) for row in
-  session.execute(stmt, {'ids': ids}).fetchall()]
+ session.execute(stmt, {'ids': ids}).fetchall()]
  return results
  def del_doc_by_ids(self, ids: List[str]) -> bool:
  return super().del_doc_by_ids(ids)
@@ -71,11 +71,10 @@ def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
 
  def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
  with Session(PGKBService.engine) as session:
- filepath = kb_file.filepath.replace('\\', '\\\\')
  session.execute(
  text(
  ''' DELETE FROM langchain_pg_embedding WHERE cmetadata::jsonb @> '{"source": "filepath"}'::jsonb;'''.replace(
- "filepath", filepath)))
+ "filepath", self.get_relative_source_path(kb_file.filepath))))
  session.commit()
 
  def do_clear_vs(self):