microsoft · Hk669 · Apr 19, 2024 · Apr 19, 2024 · Apr 24, 2024 · Apr 29, 2024
diff --git a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
@@ -103,7 +103,7 @@ def __init__(
  self._hnsw_config = self._retrieve_config.get("hnsw_config", None)
  self._payload_indexing = self._retrieve_config.get("payload_indexing", False)
 
- def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = ""):
+ def retrieve_docs(self, problem: str, n_results: Optional[int] = 20, search_string: Optional[str] = ""):
  """
  Args:
  problem (str): the problem to be solved.
@@ -150,21 +150,21 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
 
 def create_qdrant_from_dir(
  dir_path: str,
- max_tokens: int = 4000,
- client: QdrantClient = None,
- collection_name: str = "all-my-documents",
- chunk_mode: str = "multi_lines",
- must_break_at_empty_line: bool = True,
- embedding_model: str = "BAAI/bge-small-en-v1.5",
- custom_text_split_function: Callable = None,
- custom_text_types: List[str] = TEXT_FORMATS,
- recursive: bool = True,
- extra_docs: bool = False,
- parallel: int = 0,
- on_disk: bool = False,
+ max_tokens: Optional[int] = 4000,
+ client: Optional[QdrantClient] = None,
+ collection_name: Optional[str] = "all-my-documents",
+ chunk_mode: Optional[str] = "multi_lines",
+ must_break_at_empty_line: Optional[bool] = True,
+ embedding_model: Optional[str] = "BAAI/bge-small-en-v1.5",
+ custom_text_split_function: Optional[Callable] = None,
+ custom_text_types: Optional[List[str]] = TEXT_FORMATS,
+ recursive: Optional[bool] = True,
+ extra_docs: Optional[bool] = False,
+ parallel: Optional[int] = 0,
+ on_disk: Optional[bool] = False,
  quantization_config: Optional[models.QuantizationConfig] = None,
  hnsw_config: Optional[models.HnswConfigDiff] = None,
- payload_indexing: bool = False,
+ payload_indexing: Optional[bool] = False,
  qdrant_client_options: Optional[Dict] = {},
 ):
  """Create a Qdrant collection from all the files in a given directory, the directory can also be a single file or a
@@ -255,11 +255,11 @@ def create_qdrant_from_dir(
 
 def query_qdrant(
  query_texts: List[str],
- n_results: int = 10,
- client: QdrantClient = None,
- collection_name: str = "all-my-documents",
- search_string: str = "",
- embedding_model: str = "BAAI/bge-small-en-v1.5",
+ n_results: Optional[int] = 10,
+ client: Optional[QdrantClient] = None,
+ collection_name: Optional[str] = "all-my-documents",
+ search_string: Optional[str] = "",
+ embedding_model: Optional[str] = "BAAI/bge-small-en-v1.5",
  qdrant_client_options: Optional[Dict] = {},
 ) -> List[List[QueryResponse]]:
  """Perform a similarity search with filters on a Qdrant collection
@@ -304,10 +304,18 @@ class QueryResponse(BaseModel, extra="forbid"): # type: ignore
  ),
  )
 
- data = {
- "ids": [[result.id for result in sublist] for sublist in results],
- "documents": [[result.document for result in sublist] for sublist in results],
- "distances": [[result.score for result in sublist] for sublist in results],
- "metadatas": [[result.metadata for result in sublist] for sublist in results],
- }
+ data = [
+ [
+ QueryResponse(
+ id=result.id,
+ embedding=result.embedding,
+ metadata=result.metadata,
+ document=result.document,
+ score=result.score,
+ )
+ for result in sublist
+ ]
+ for sublist in results
+ ]
+
  return data
diff --git a/autogen/math_utils.py b/autogen/math_utils.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Dict, Optional, Tuple, Union
 
 from autogen import DEFAULT_MODEL, oai
 
@@ -9,7 +9,7 @@
 }
 
 
-def solve_problem(problem: str, **config) -> str:
+def solve_problem(problem: str, **config) -> Tuple[Optional[str], int]:
  """(openai<1) Solve the math problem.
 
  Args:
@@ -25,7 +25,7 @@ def solve_problem(problem: str, **config) -> str:
  return results.get("voted_answer"), response["cost"]
 
 
-def remove_boxed(string: str) -> Optional[str]:
+def remove_boxed(string: str) -> Union[str, None]:
  """Source: https://github.com/hendrycks/math
  Extract the text within a \\boxed{...} environment.
  Example:
@@ -36,15 +36,15 @@ def remove_boxed(string: str) -> Optional[str]:
  """
  left = "\\boxed{"
  try:
- if not all((string[: len(left)] == left, string[-1] == "}")):
+ if not ((string.startswith(left) and string[-1] == "}")):
  raise AssertionError
 
  return string[len(left) : -1]
  except Exception:
  return None
 
 
-def last_boxed_only_string(string: str) -> Optional[str]:
+def last_boxed_only_string(string: str) -> Union[str, None]:
  """Source: https://github.com/hendrycks/math
  Extract the last \\boxed{...} or \\fbox{...} element from a string.
  """
@@ -96,7 +96,7 @@ def _fix_fracs(string: str) -> str:
  new_str += substr
  else:
  try:
- if not len(substr) >= 2:
+ if len(substr) < 2:
  raise AssertionError
  except Exception:
  return string
@@ -132,7 +132,7 @@ def _fix_a_slash_b(string: str) -> str:
  try:
  a = int(a_str)
  b = int(b_str)
- if not string == "{}/{}".format(a, b):
+ if string != "{}/{}".format(a, b):
  raise AssertionError
  new_string = "\\frac{" + str(a) + "}{" + str(b) + "}"
  return new_string
@@ -147,7 +147,7 @@ def _remove_right_units(string: str) -> str:
  """
  if "\\text{ " in string:
  splits = string.split("\\text{ ")
- if not len(splits) == 2:
+ if len(splits) != 2:
  raise AssertionError
  return splits[0]
  else:
@@ -161,16 +161,17 @@ def _fix_sqrt(string: str) -> str:
  >>> _fix_sqrt("\\sqrt3")
  \\sqrt{3}
  """
- if "\\sqrt" not in string:
+ SQRT_LITERAL = "\\sqrt" # Define a constant for the repeated literal
+ if SQRT_LITERAL not in string:
  return string
- splits = string.split("\\sqrt")
+ splits = string.split(SQRT_LITERAL)
  new_string = splits[0]
  for split in splits[1:]:
  if split[0] != "{":
  a = split[0]
- new_substr = "\\sqrt{" + a + "}" + split[1:]
+ new_substr = SQRT_LITERAL + "{" + a + "}" + split[1:]
  else:
- new_substr = "\\sqrt" + split
+ new_substr = SQRT_LITERAL + split
  new_string += new_substr
  return new_string
 
@@ -310,7 +311,7 @@ def voting_counts(responses):
  return answers
 
 
-def eval_math_responses(responses, solution=None, **args):
+def eval_math_responses(responses, solution=None, **args) -> Dict:
  """Select a response for a math problem using voting, and check if the response is correct if the solution is provided.
 
  Args:

diff --git a/autogen/retrieve_utils.py b/autogen/retrieve_utils.py
@@ -2,7 +2,7 @@
 import hashlib
 import os
 import re
-from typing import Callable, List, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Union
 from urllib.parse import urlparse
 
 import chromadb
@@ -195,7 +195,9 @@ def split_files_to_chunks(
  return chunks, sources
 
 
-def get_files_from_dir(dir_path: Union[str, List[str]], types: list = TEXT_FORMATS, recursive: bool = True):
+def get_files_from_dir(
+ dir_path: Union[str, List[str]], types: Optional[list] = TEXT_FORMATS, recursive: Optional[bool] = True
+):
  """Return a list of all the files in a given directory, a url, a file path or a list of them."""
  if len(types) == 0:
  raise ValueError("types cannot be empty.")
@@ -245,7 +247,7 @@ def get_files_from_dir(dir_path: Union[str, List[str]], types: list = TEXT_FORMA
  return files
 
 
-def parse_html_to_markdown(html: str, url: str = None) -> str:
+def parse_html_to_markdown(html: str, url: Optional[str] = None) -> str:
  """Parse HTML to markdown."""
  soup = BeautifulSoup(html, "html.parser")
  title = soup.title.string
@@ -278,14 +280,16 @@ def parse_html_to_markdown(html: str, url: str = None) -> str:
 
 def _generate_file_name_from_url(url: str, max_length=255) -> str:
  url_bytes = url.encode("utf-8")
- hash = hashlib.blake2b(url_bytes).hexdigest()
+ url_hash = hashlib.blake2b(url_bytes).hexdigest()
  parsed_url = urlparse(url)
  file_name = os.path.basename(url)
- file_name = f"{parsed_url.netloc}_{file_name}_{hash[:min(8, max_length-len(parsed_url.netloc)-len(file_name)-1)]}"
+ file_name = (
+ f"{parsed_url.netloc}_{file_name}_{url_hash[:min(8, max_length-len(parsed_url.netloc)-len(file_name)-1)]}"
+ )
  return file_name
 
 
-def get_file_from_url(url: str, save_path: str = None) -> Tuple[str, str]:
+def get_file_from_url(url: str, save_path: str = None) -> Union[Tuple[str, str], None]:
  """Download a file from a URL."""
  if save_path is None:
  save_path = "tmp/chromadb"
@@ -333,19 +337,19 @@ def is_url(string: str):
 
 def create_vector_db_from_dir(
  dir_path: Union[str, List[str]],
- max_tokens: int = 4000,
- client: API = None,
- db_path: str = "tmp/chromadb.db",
- collection_name: str = "all-my-documents",
- get_or_create: bool = False,
- chunk_mode: str = "multi_lines",
- must_break_at_empty_line: bool = True,
- embedding_model: str = "all-MiniLM-L6-v2",
- embedding_function: Callable = None,
- custom_text_split_function: Callable = None,
- custom_text_types: List[str] = TEXT_FORMATS,
- recursive: bool = True,
- extra_docs: bool = False,
+ max_tokens: Optional[int] = 4000,
+ client: Optional[API] = None,
+ db_path: Optional[str] = "tmp/chromadb.db",
+ collection_name: Optional[str] = "all-my-documents",
+ get_or_create: Optional[bool] = False,
+ chunk_mode: Optional[str] = "multi_lines",
+ must_break_at_empty_line: Optional[bool] = True,
+ embedding_model: Optional[str] = "all-MiniLM-L6-v2",
+ embedding_function: Optional[Callable] = None,
+ custom_text_split_function: Optional[Callable] = None,
+ custom_text_types: Optional[List[str]] = TEXT_FORMATS,
+ recursive: Optional[bool] = True,
+ extra_docs: Optional[bool] = False,
 ) -> API:
  """Create a vector db from all the files in a given directory, the directory can also be a single file or a url to
  a single file. We support chromadb compatible APIs to create the vector db, this function is not required if
@@ -426,13 +430,13 @@ def create_vector_db_from_dir(
 
 def query_vector_db(
  query_texts: List[str],
- n_results: int = 10,
- client: API = None,
- db_path: str = "tmp/chromadb.db",
- collection_name: str = "all-my-documents",
- search_string: str = "",
- embedding_model: str = "all-MiniLM-L6-v2",
- embedding_function: Callable = None,
+ n_results: Optional[int] = 10,
+ client: Optional[API] = None,
+ db_path: Optional[str] = "tmp/chromadb.db",
+ collection_name: Optional[str] = "all-my-documents",
+ search_string: Optional[str] = "",
+ embedding_model: Optional[str] = "all-MiniLM-L6-v2",
+ embedding_function: Optional[Callable] = None,
 ) -> QueryResult:
  """Query a vector db. We support chromadb compatible APIs, it's not required if you prepared your own vector db
  and query function.

diff --git a/autogen/runtime_logging.py b/autogen/runtime_logging.py
@@ -31,8 +31,7 @@ def start(logger_type: str = "sqlite", config: Optional[Dict[str, Any]] = None)
  is_logging = True
  except Exception as e:
  logger.error(f"[runtime logging] Failed to start logging: {e}")
- finally:
- return session_id
+ return session_id
 
 
 def log_chat_completion(