OpenDevin · enyst · May 5, 2024 · Apr 26, 2024 · Apr 26, 2024 · Apr 27, 2024
diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py
@@ -32,7 +32,7 @@
 if config.get(ConfigType.AGENT_MEMORY_ENABLED):
  from agenthub.monologue_agent.utils.memory import LongTermMemory
 
-MAX_MONOLOGUE_LENGTH = 20000
+MAX_TOKEN_COUNT_PADDING = 512
 MAX_OUTPUT_LENGTH = 5000
 
 INITIAL_THOUGHTS = [
@@ -124,7 +124,17 @@ def _add_event(self, event: dict):
  self.monologue.add_event(event)
  if self.memory is not None:
  self.memory.add_event(event)
- if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
+
+ # Test monologue token length
+ prompt = prompts.get_request_action_prompt(
+ '',
+ self.monologue.get_thoughts(),
+ [],
+ )
+ messages = [{'content': prompt, 'role': 'user'}]
+ token_count = self.llm.get_token_count(messages)
+
+ if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
  self.monologue.condense(self.llm)
 
  def _initialize(self, task: str):

diff --git a/opendevin/config.py b/opendevin/config.py
@@ -21,6 +21,7 @@
 DEFAULT_CONFIG: dict = {
  ConfigType.LLM_API_KEY: None,
  ConfigType.LLM_BASE_URL: None,
+ ConfigType.LLM_CUSTOM_LLM_PROVIDER: 'openai',
  ConfigType.WORKSPACE_BASE: os.getcwd(),
  ConfigType.WORKSPACE_MOUNT_PATH: None,
  ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX: '/workspace',
@@ -36,6 +37,8 @@
  ConfigType.LLM_RETRY_MIN_WAIT: 3,
  ConfigType.LLM_RETRY_MAX_WAIT: 60,
  ConfigType.MAX_ITERATIONS: 100,
+ ConfigType.LLM_MAX_INPUT_TOKENS: None,
+ ConfigType.LLM_MAX_OUTPUT_TOKENS: None,
  ConfigType.AGENT_MEMORY_MAX_THREADS: 2,
  ConfigType.AGENT_MEMORY_ENABLED: False,
  # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side,

diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
@@ -1,4 +1,5 @@
 from litellm import completion as litellm_completion
+import litellm
 from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random_exponential
 from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError
 from functools import partial
@@ -16,6 +17,9 @@
 LLM_NUM_RETRIES = config.get(ConfigType.LLM_NUM_RETRIES)
 LLM_RETRY_MIN_WAIT = config.get(ConfigType.LLM_RETRY_MIN_WAIT)
 LLM_RETRY_MAX_WAIT = config.get(ConfigType.LLM_RETRY_MAX_WAIT)
+LLM_MAX_INPUT_TOKENS = config.get(ConfigType.LLM_MAX_INPUT_TOKENS)
+LLM_MAX_OUTPUT_TOKENS = config.get(ConfigType.LLM_MAX_OUTPUT_TOKENS)
+LLM_CUSTOM_LLM_PROVIDER = config.get(ConfigType.LLM_CUSTOM_LLM_PROVIDER)
 
 
 class LLM:
@@ -31,6 +35,9 @@ def __init__(self,
  num_retries=LLM_NUM_RETRIES,
  retry_min_wait=LLM_RETRY_MIN_WAIT,
  retry_max_wait=LLM_RETRY_MAX_WAIT,
+ max_input_tokens=LLM_MAX_INPUT_TOKENS,
+ max_output_tokens=LLM_MAX_OUTPUT_TOKENS,
+ custom_llm_provider=LLM_CUSTOM_LLM_PROVIDER
  ):
  """
  Args:
@@ -41,6 +48,9 @@ def __init__(self,
  num_retries (int, optional): The number of retries for API calls. Defaults to LLM_NUM_RETRIES.
  retry_min_wait (int, optional): The minimum time to wait between retries in seconds. Defaults to LLM_RETRY_MIN_TIME.
  retry_max_wait (int, optional): The maximum time to wait between retries in seconds. Defaults to LLM_RETRY_MAX_TIME.
+ max_input_tokens (int, optional): The maximum number of tokens to send to and receive from LLM per task. Defaults to LLM_MAX_INPUT_TOKENS.
+ max_output_tokens (int, optional): The maximum number of tokens to send to and receive from LLM per task. Defaults to LLM_MAX_OUTPUT_TOKENS.
+ custom_llm_provider (function, optional): A custom LLM provider. Defaults to LLM_CUSTOM_LLM_PROVIDER.
 
  Attributes:
  model_name (str): The name of the language model.
@@ -54,9 +64,32 @@ def __init__(self,
  self.api_key = api_key
  self.base_url = base_url
  self.api_version = api_version
+ self.max_input_tokens = max_input_tokens
+ self.max_output_tokens = max_output_tokens
+ self.custom_llm_provider = custom_llm_provider
+
+ # litellm actually uses base Exception here for unknown model
+ self.model_info = None
+ try:
+ self.model_info = litellm.get_model_info(self.model_name)
+ # noinspection PyBroadException
+ except Exception:
+ logger.warning(f'Could not get model info for {self.model_name}')
+
+ if self.max_input_tokens is None:
+ if self.model_info is not None and 'max_input_tokens' in self.model_info:
+ self.max_input_tokens = self.model_info['max_input_tokens']
+ else:
+ self.max_input_tokens = 4096
+
+ if self.max_output_tokens is None:
+ if self.model_info is not None and 'max_output_tokens' in self.model_info:
+ self.max_output_tokens = self.model_info['max_output_tokens']
+ else:
+ self.max_output_tokens = 1024
 
  self._completion = partial(
- litellm_completion, model=self.model_name, api_key=self.api_key, base_url=self.base_url, api_version=self.api_version)
+ litellm_completion, model=self.model_name, api_key=self.api_key, base_url=self.base_url, api_version=self.api_version, max_tokens=max_output_tokens, custom_llm_provider=custom_llm_provider)
 
  completion_unwrapped = self._completion
 
@@ -89,6 +122,18 @@ def completion(self):
  """
  return self._completion
 
+ def get_token_count(self, messages):
+ """
+ Get the number of tokens in a list of messages.
+
+ Args:
+ messages (list): A list of messages.
+
+ Returns:
+ int: The number of tokens.
+ """
+ return litellm.token_counter(model=self.model_name, messages=messages)
+
  def __str__(self):
  if self.api_version:
  return f'LLM(model={self.model_name}, api_version={self.api_version}, base_url={self.base_url})'

diff --git a/opendevin/schema/config.py b/opendevin/schema/config.py
@@ -2,6 +2,9 @@
 
 
 class ConfigType(str, Enum):
+ LLM_CUSTOM_LLM_PROVIDER = 'LLM_CUSTOM_LLM_PROVIDER'
+ LLM_MAX_INPUT_TOKENS = 'LLM_MAX_INPUT_TOKENS'
+ LLM_MAX_OUTPUT_TOKENS = 'LLM_MAX_OUTPUT_TOKENS'
  LLM_API_KEY = 'LLM_API_KEY'
  LLM_BASE_URL = 'LLM_BASE_URL'
  WORKSPACE_BASE = 'WORKSPACE_BASE'