diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py index 817dd638989..a9451b8c0f4 100644 --- a/agenthub/monologue_agent/agent.py +++ b/agenthub/monologue_agent/agent.py @@ -32,7 +32,7 @@ if config.get(ConfigType.AGENT_MEMORY_ENABLED): from agenthub.monologue_agent.utils.memory import LongTermMemory -MAX_MONOLOGUE_LENGTH = 20000 +MAX_TOKEN_COUNT_PADDING = 512 MAX_OUTPUT_LENGTH = 5000 INITIAL_THOUGHTS = [ @@ -128,7 +128,17 @@ def _add_event(self, event: dict): self.monologue.add_event(event) if self.memory is not None: self.memory.add_event(event) - if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH: + + # Test monologue token length + prompt = prompts.get_request_action_prompt( + '', + self.monologue.get_thoughts(), + [], + ) + messages = [{'content': prompt, 'role': 'user'}] + token_count = self.llm.get_token_count(messages) + + if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens: self.monologue.condense(self.llm) def _initialize(self, task: str): diff --git a/opendevin/core/config.py b/opendevin/core/config.py index 5a4bb8ea693..6d8c45124e0 100644 --- a/opendevin/core/config.py +++ b/opendevin/core/config.py @@ -22,6 +22,7 @@ DEFAULT_CONFIG: dict = { ConfigType.LLM_API_KEY: None, ConfigType.LLM_BASE_URL: None, + ConfigType.LLM_CUSTOM_LLM_PROVIDER: None, ConfigType.WORKSPACE_BASE: os.getcwd(), ConfigType.WORKSPACE_MOUNT_PATH: None, ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX: '/workspace', @@ -38,10 +39,11 @@ ConfigType.LLM_RETRY_MIN_WAIT: 3, ConfigType.LLM_RETRY_MAX_WAIT: 60, ConfigType.MAX_ITERATIONS: 100, + ConfigType.LLM_MAX_INPUT_TOKENS: None, + ConfigType.LLM_MAX_OUTPUT_TOKENS: None, ConfigType.AGENT_MEMORY_MAX_THREADS: 2, ConfigType.AGENT_MEMORY_ENABLED: False, ConfigType.LLM_TIMEOUT: None, - ConfigType.LLM_MAX_RETURN_TOKENS: None, ConfigType.LLM_TEMPERATURE: None, ConfigType.LLM_TOP_P: None, # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side, diff --git a/opendevin/core/schema/config.py b/opendevin/core/schema/config.py index ccde824df11..b99d98b938a 100644 --- a/opendevin/core/schema/config.py +++ b/opendevin/core/schema/config.py @@ -2,9 +2,11 @@ class ConfigType(str, Enum): + LLM_CUSTOM_LLM_PROVIDER = 'LLM_CUSTOM_LLM_PROVIDER' + LLM_MAX_INPUT_TOKENS = 'LLM_MAX_INPUT_TOKENS' + LLM_MAX_OUTPUT_TOKENS = 'LLM_MAX_OUTPUT_TOKENS' LLM_TOP_P = 'LLM_TOP_P' LLM_TEMPERATURE = 'LLM_TEMPERATURE' - LLM_MAX_RETURN_TOKENS = 'LLM_MAX_RETURN_TOKENS' LLM_TIMEOUT = 'LLM_TIMEOUT' LLM_API_KEY = 'LLM_API_KEY' LLM_BASE_URL = 'LLM_BASE_URL' diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py index f9ce38dc19c..e3f9fe39ffa 100644 --- a/opendevin/llm/llm.py +++ b/opendevin/llm/llm.py @@ -1,5 +1,6 @@ from functools import partial +import litellm from litellm import completion as litellm_completion from litellm.exceptions import ( APIConnectionError, @@ -25,8 +26,10 @@ LLM_NUM_RETRIES = config.get(ConfigType.LLM_NUM_RETRIES) LLM_RETRY_MIN_WAIT = config.get(ConfigType.LLM_RETRY_MIN_WAIT) LLM_RETRY_MAX_WAIT = config.get(ConfigType.LLM_RETRY_MAX_WAIT) +LLM_MAX_INPUT_TOKENS = config.get(ConfigType.LLM_MAX_INPUT_TOKENS) +LLM_MAX_OUTPUT_TOKENS = config.get(ConfigType.LLM_MAX_OUTPUT_TOKENS) +LLM_CUSTOM_LLM_PROVIDER = config.get(ConfigType.LLM_CUSTOM_LLM_PROVIDER) LLM_TIMEOUT = config.get(ConfigType.LLM_TIMEOUT) -LLM_MAX_RETURN_TOKENS = config.get(ConfigType.LLM_MAX_RETURN_TOKENS) LLM_TEMPERATURE = config.get(ConfigType.LLM_TEMPERATURE) LLM_TOP_P = config.get(ConfigType.LLM_TOP_P) @@ -45,8 +48,10 @@ def __init__( num_retries=LLM_NUM_RETRIES, retry_min_wait=LLM_RETRY_MIN_WAIT, retry_max_wait=LLM_RETRY_MAX_WAIT, + max_input_tokens=LLM_MAX_INPUT_TOKENS, + max_output_tokens=LLM_MAX_OUTPUT_TOKENS, + custom_llm_provider=LLM_CUSTOM_LLM_PROVIDER, llm_timeout=LLM_TIMEOUT, - llm_max_return_tokens=LLM_MAX_RETURN_TOKENS, llm_temperature=LLM_TEMPERATURE, llm_top_p=LLM_TOP_P, ): @@ -59,8 +64,10 @@ def __init__( num_retries (int, optional): The number of retries for API calls. Defaults to LLM_NUM_RETRIES. retry_min_wait (int, optional): The minimum time to wait between retries in seconds. Defaults to LLM_RETRY_MIN_TIME. retry_max_wait (int, optional): The maximum time to wait between retries in seconds. Defaults to LLM_RETRY_MAX_TIME. + max_input_tokens (int, optional): The maximum number of tokens to send to the LLM per task. Defaults to LLM_MAX_INPUT_TOKENS. + max_output_tokens (int, optional): The maximum number of tokens to receive from the LLM per task. Defaults to LLM_MAX_OUTPUT_TOKENS. + custom_llm_provider (str, optional): A custom LLM provider. Defaults to LLM_CUSTOM_LLM_PROVIDER. llm_timeout (int, optional): The maximum time to wait for a response in seconds. Defaults to LLM_TIMEOUT. - llm_max_return_tokens (int, optional): The maximum number of tokens to return. Defaults to LLM_MAX_RETURN_TOKENS. Attributes: model_name (str): The name of the language model. @@ -73,8 +80,33 @@ def __init__( self.api_key = api_key self.base_url = base_url self.api_version = api_version + self.max_input_tokens = max_input_tokens + self.max_output_tokens = max_output_tokens self.llm_timeout = llm_timeout - self.llm_max_return_tokens = llm_max_return_tokens + self.custom_llm_provider = custom_llm_provider + + # litellm actually uses base Exception here for unknown model + self.model_info = None + try: + self.model_info = litellm.get_model_info(self.model_name) + # noinspection PyBroadException + except Exception: + logger.warning(f'Could not get model info for {self.model_name}') + + if self.max_input_tokens is None: + if self.model_info is not None and 'max_input_tokens' in self.model_info: + self.max_input_tokens = self.model_info['max_input_tokens'] + else: + # Max input tokens for gpt3.5, so this is a safe fallback for any potentially viable model + self.max_input_tokens = 4096 + + if self.max_output_tokens is None: + if self.model_info is not None and 'max_output_tokens' in self.model_info: + self.max_output_tokens = self.model_info['max_output_tokens'] + else: + # Enough tokens for most output actions, and not too many for a bad llm to get carried away responding + # with thousands of unwanted tokens + self.max_output_tokens = 1024 self._completion = partial( litellm_completion, @@ -82,7 +114,8 @@ def __init__( api_key=self.api_key, base_url=self.base_url, api_version=self.api_version, - max_tokens=self.llm_max_return_tokens, + custom_llm_provider=custom_llm_provider, + max_tokens=self.max_output_tokens, timeout=self.llm_timeout, temperature=llm_temperature, top_p=llm_top_p, @@ -129,6 +162,18 @@ def completion(self): """ return self._completion + def get_token_count(self, messages): + """ + Get the number of tokens in a list of messages. + + Args: + messages (list): A list of messages. + + Returns: + int: The number of tokens. + """ + return litellm.token_counter(model=self.model_name, messages=messages) + def __str__(self): if self.api_version: return f'LLM(model={self.model_name}, api_version={self.api_version}, base_url={self.base_url})'