OpenDevin · rbren · Apr 27, 2024
diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py
@@ -77,7 +77,7 @@
  "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
  'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
  "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
- "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
+ 'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
 ]
 
 

diff --git a/agenthub/monologue_agent/utils/prompts.py b/agenthub/monologue_agent/utils/prompts.py
@@ -28,8 +28,8 @@
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next thought or action? Your response must be in JSON format.
-It must be an object, and it must contain two fields:
+What is your next single thought or action? Your response must be in JSON format.
+It must be a single object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
@@ -59,11 +59,15 @@
 actions are all "think" actions, you should consider taking a different action.
 
 Notes:
-* your environment is Debian Linux. You can install software with `apt`
-* your working directory will not change, even if you run `cd`. All commands will be run in the `%(WORKSPACE_MOUNT_PATH_IN_SANDBOX)s` directory.
+* you are logged in as %(user)s, but sudo will always work without a password.
+* all non-background commands will be forcibly stopped if they remain running for over %(timeout)s seconds.
+* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
+* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
+* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
+* whenever an action fails, always `think` about why it may have happened before acting again.
 
-What is your next thought or action? Again, you must reply with JSON, and only with JSON.
+What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
 
 %(hint)s
 """
@@ -142,11 +146,15 @@ def get_request_action_prompt(
  )
  bg_commands_message += '\nYou can end any process by sending a `kill` action with the numerical `id` above.'
 
+ user = 'opendevin' if config.get(ConfigType.RUN_AS_DEVIN) else 'root'
+
  return ACTION_PROMPT % {
  'task': task,
  'monologue': json.dumps(thoughts, indent=2),
  'background_commands': bg_commands_message,
  'hint': hint,
+ 'user': user,
+ 'timeout': config.get(ConfigType.SANDBOX_TIMEOUT),
  'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.get(ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX),
  }
 
@@ -181,6 +189,10 @@ def rank(match):
  raise LLMOutputError(
  'Invalid JSON, the response must be well-formed JSON as specified in the prompt.'
  )
+ except TypeError:
+ raise LLMOutputError(
+ 'Invalid JSON, the response must be well-formed JSON as specified in the prompt.'
+ )
  if 'content' in action_dict:
  # The LLM gets confused here. Might as well be robust
  action_dict['contents'] = action_dict.pop('content')

diff --git a/opendevin/action/fileop.py b/opendevin/action/fileop.py
@@ -20,10 +20,16 @@
 SANDBOX_PATH_PREFIX = '/workspace/'
 
 
-def resolve_path(file_path):
+def resolve_path(file_path, working_directory):
+ path_in_sandbox = Path(file_path)
+
+ # Apply working directory
+ if not path_in_sandbox.is_absolute():
+ path_in_sandbox = Path(working_directory) / path_in_sandbox
+
  # Sanitize the path with respect to the root of the full sandbox
- # (deny any .. path traversal to parent directories of this)
- abs_path_in_sandbox = (Path(SANDBOX_PATH_PREFIX) / Path(file_path)).resolve()
+ # (deny any .. path traversal to parent directories of the sandbox)
+ abs_path_in_sandbox = path_in_sandbox.resolve()
 
  # If the path is outside the workspace, deny it
  if not abs_path_in_sandbox.is_relative_to(SANDBOX_PATH_PREFIX):
@@ -71,7 +77,7 @@ async def run(self, controller) -> Observation:
  code_view = ''.join(read_lines)
  else:
  try:
- whole_path = resolve_path(self.path)
+ whole_path = resolve_path(self.path, controller.action_manager.sandbox.get_working_directory())
  self.start = max(self.start, 0)
  try:
  with open(whole_path, 'r', encoding='utf-8') as file:
@@ -123,7 +129,7 @@ async def run(self, controller) -> Observation:
  return AgentErrorObservation(f'File not found: {self.path}')
  else:
  try:
- whole_path = resolve_path(self.path)
+ whole_path = resolve_path(self.path, controller.action_manager.sandbox.get_working_directory())
  mode = 'w' if not os.path.exists(whole_path) else 'r+'
  try:
  with open(whole_path, mode, encoding='utf-8') as file:

diff --git a/opendevin/config.py b/opendevin/config.py
@@ -36,6 +36,8 @@
  ConfigType.LLM_RETRY_MIN_WAIT: 3,
  ConfigType.LLM_RETRY_MAX_WAIT: 60,
  ConfigType.MAX_ITERATIONS: 100,
+ ConfigType.LLM_TIMEOUT: None,
+ ConfigType.LLM_MAX_RETURN_TOKENS: None,
  ConfigType.AGENT_MEMORY_MAX_THREADS: 2,
  ConfigType.AGENT_MEMORY_ENABLED: False,
  # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side,
@@ -48,6 +50,7 @@
  ConfigType.USE_HOST_NETWORK: 'false',
  ConfigType.SSH_HOSTNAME: 'localhost',
  ConfigType.DISABLE_COLOR: 'false',
+ ConfigType.SANDBOX_TIMEOUT: 120
 }
 
 config_str = ''

diff --git a/opendevin/controller/action_manager.py b/opendevin/controller/action_manager.py
@@ -26,15 +26,21 @@ def __init__(
  if sandbox_type == 'exec':
  self.sandbox = DockerExecBox(
  sid=(sid or 'default'),
+ timeout=config.get(ConfigType.SANDBOX_TIMEOUT)
  )
  elif sandbox_type == 'local':
- self.sandbox = LocalBox()
+ self.sandbox = LocalBox(
+ timeout=config.get(ConfigType.SANDBOX_TIMEOUT)
+ )
  elif sandbox_type == 'ssh':
  self.sandbox = DockerSSHBox(
- sid=(sid or 'default')
+ sid=(sid or 'default'),
+ timeout=config.get(ConfigType.SANDBOX_TIMEOUT)
  )
  elif sandbox_type == 'e2b':
- self.sandbox = E2BBox()
+ self.sandbox = E2BBox(
+ timeout=config.get(ConfigType.SANDBOX_TIMEOUT)
+ )
  else:
  raise ValueError(f'Invalid sandbox type: {sandbox_type}')
 

diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
@@ -4,10 +4,9 @@
 from functools import partial
 
 from opendevin import config
-from opendevin.schema.config import ConfigType
 from opendevin.logger import llm_prompt_logger, llm_response_logger
 from opendevin.logger import opendevin_logger as logger
-
+from opendevin.schema import ConfigType
 
 DEFAULT_API_KEY = config.get(ConfigType.LLM_API_KEY)
 DEFAULT_BASE_URL = config.get(ConfigType.LLM_BASE_URL)
@@ -16,6 +15,8 @@
 LLM_NUM_RETRIES = config.get(ConfigType.LLM_NUM_RETRIES)
 LLM_RETRY_MIN_WAIT = config.get(ConfigType.LLM_RETRY_MIN_WAIT)
 LLM_RETRY_MAX_WAIT = config.get(ConfigType.LLM_RETRY_MAX_WAIT)
+LLM_TIMEOUT = config.get(ConfigType.LLM_TIMEOUT)
+LLM_MAX_RETURN_TOKENS = config.get(ConfigType.LLM_MAX_RETURN_TOKENS)
 
 
 class LLM:
@@ -31,6 +32,8 @@ def __init__(self,
  num_retries=LLM_NUM_RETRIES,
  retry_min_wait=LLM_RETRY_MIN_WAIT,
  retry_max_wait=LLM_RETRY_MAX_WAIT,
+ llm_timeout=LLM_TIMEOUT,
+ llm_max_return_tokens=LLM_MAX_RETURN_TOKENS
  ):
  """
  Args:
@@ -41,6 +44,8 @@ def __init__(self,
  num_retries (int, optional): The number of retries for API calls. Defaults to LLM_NUM_RETRIES.
  retry_min_wait (int, optional): The minimum time to wait between retries in seconds. Defaults to LLM_RETRY_MIN_TIME.
  retry_max_wait (int, optional): The maximum time to wait between retries in seconds. Defaults to LLM_RETRY_MAX_TIME.
+ llm_timeout (int, optional): The maximum time to wait for a response in seconds. Defaults to LLM_TIMEOUT.
+ llm_max_return_tokens (int, optional): The maximum number of tokens to return. Defaults to LLM_MAX_RETURN_TOKENS.
 
  Attributes:
  model_name (str): The name of the language model.
@@ -54,9 +59,11 @@ def __init__(self,
  self.api_key = api_key
  self.base_url = base_url
  self.api_version = api_version
+ self.llm_timeout = llm_timeout
+ self.llm_max_return_tokens = llm_max_return_tokens
 
  self._completion = partial(
- litellm_completion, model=self.model_name, api_key=self.api_key, base_url=self.base_url, api_version=self.api_version)
+ litellm_completion, model=self.model_name, api_key=self.api_key, base_url=self.base_url, api_version=self.api_version, max_tokens=self.llm_max_return_tokens, timeout=self.llm_timeout)
 
  completion_unwrapped = self._completion
 

diff --git a/opendevin/sandbox/docker/exec_box.py b/opendevin/sandbox/docker/exec_box.py
@@ -268,6 +268,9 @@ def close(self):
  except docker.errors.NotFound:
  pass
 
+ def get_working_directory(self):
+ return SANDBOX_WORKSPACE_DIR
+
 
 if __name__ == '__main__':
  try:

diff --git a/opendevin/sandbox/docker/local_box.py b/opendevin/sandbox/docker/local_box.py
@@ -96,3 +96,6 @@ def close(self):
 
  def cleanup(self):
  self.close()
+
+ def get_working_directory(self):
+ return config.get(ConfigType.WORKSPACE_BASE)
diff --git a/opendevin/sandbox/docker/ssh_box.py b/opendevin/sandbox/docker/ssh_box.py
@@ -169,7 +169,7 @@ def setup_user(self):
 
  def start_ssh_session(self):
  # start ssh session at the background
- self.ssh = pxssh.pxssh()
+ self.ssh = pxssh.pxssh(echo=False)
  hostname = SSH_HOSTNAME
  if RUN_AS_DEVIN:
  username = 'opendevin'
@@ -211,49 +211,14 @@ def execute(self, cmd: str) -> Tuple[int, str]:
  # send a SIGINT to the process
  self.ssh.sendintr()
  self.ssh.prompt()
- command_output = self.ssh.before.decode(
- 'utf-8').lstrip(cmd).strip()
+ command_output = self.ssh.before.decode('utf-8').strip()
  return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}'
  command_output = self.ssh.before.decode('utf-8').strip()
 
- # NOTE: there's some weird behavior with the prompt (it may come AFTER the command output)
- # so we need to check if the command is in the output
- n_tries = 5
- while not command_output.startswith(cmd) and n_tries > 0:
- self.ssh.prompt()
- command_output = self.ssh.before.decode('utf-8').strip()
- time.sleep(0.5)
- n_tries -= 1
- if n_tries == 0 and not command_output.startswith(cmd):
- raise Exception(
- f'Something went wrong with the SSH sanbox, cannot get output for command [{cmd}] after 5 retries'
- )
- logger.debug(f'Command output GOT SO FAR: {command_output}')
- # once out, make sure that we have *every* output, we while loop until we get an empty output
- while True:
- logger.debug('WAITING FOR .prompt()')
- self.ssh.sendline('\n')
- timeout_not_reached = self.ssh.prompt(timeout=1)
- if not timeout_not_reached:
- logger.debug('TIMEOUT REACHED')
- break
- logger.debug('WAITING FOR .before')
- output = self.ssh.before.decode('utf-8').strip()
- logger.debug(f'WAITING FOR END OF command output ({bool(output)}): {output}')
- if output == '':
- break
- command_output += output
- command_output = command_output.lstrip(cmd).strip()
-
  # get the exit code
  self.ssh.sendline('echo $?')
- self.ssh.prompt()
- exit_code = self.ssh.before.decode('utf-8')
- while not exit_code.startswith('echo $?'):
- self.ssh.prompt()
- exit_code = self.ssh.before.decode('utf-8')
- logger.debug(f'WAITING FOR exit code: {exit_code}')
- exit_code = int(exit_code.lstrip('echo $?').strip())
+ self.ssh.prompt(timeout=10)
+ exit_code = int(self.ssh.before.decode('utf-8').strip())
  return exit_code, command_output
 
  def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
@@ -337,6 +302,11 @@ def stop_docker_container(self):
  except docker.errors.NotFound:
  pass
 
+ def get_working_directory(self):
+ self.ssh.sendline('pwd')
+ self.ssh.prompt(timeout=10)
+ return self.ssh.before.decode('utf-8').strip()
+
  def is_container_running(self):
  try:
  container = self.docker_client.containers.get(self.container_name)

diff --git a/opendevin/sandbox/e2b/sandbox.py b/opendevin/sandbox/e2b/sandbox.py
@@ -124,3 +124,6 @@ def kill_background(self, process_id: int):
 
  def close(self):
  self.sandbox.close()
+
+ def get_working_directory(self):
+ return self.sandbox.cwd
diff --git a/opendevin/sandbox/sandbox.py b/opendevin/sandbox/sandbox.py
@@ -32,3 +32,7 @@ def close(self):
  @abstractmethod
  def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
  pass
+
+ @abstractmethod
+ def get_working_directory(self):
+ pass
diff --git a/opendevin/schema/config.py b/opendevin/schema/config.py
@@ -2,6 +2,8 @@
 
 
 class ConfigType(str, Enum):
+ LLM_MAX_RETURN_TOKENS = 'LLM_MAX_RETURN_TOKENS'
+ LLM_TIMEOUT = 'LLM_TIMEOUT'
  LLM_API_KEY = 'LLM_API_KEY'
  LLM_BASE_URL = 'LLM_BASE_URL'
  WORKSPACE_BASE = 'WORKSPACE_BASE'
@@ -26,6 +28,7 @@ class ConfigType(str, Enum):
  E2B_API_KEY = 'E2B_API_KEY'
  SANDBOX_TYPE = 'SANDBOX_TYPE'
  SANDBOX_USER_ID = 'SANDBOX_USER_ID'
+ SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
  USE_HOST_NETWORK = 'USE_HOST_NETWORK'
  SSH_HOSTNAME = 'SSH_HOSTNAME'
  DISABLE_COLOR = 'DISABLE_COLOR'
diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log
@@ -281,15 +281,15 @@ This is your internal monologue, in JSON format:
  {
  "action": "think",
  "args": {
- "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here."
+ "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself."
  }
  }
 ]
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next thought or action? Your response must be in JSON format.
-It must be an object, and it must contain two fields:
+What is your next single thought or action? Your response must be in JSON format.
+It must be a single object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
@@ -319,10 +319,14 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 
 Notes:
-* your environment is Debian Linux. You can install software with `apt`
-* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
+* you are logged in as opendevin, but sudo will always work without a password.
+* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
+* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
+* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
+* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
+* whenever an action fails, always `think` about why it may have happened before acting again.
 
-What is your next thought or action? Again, you must reply with JSON, and only with JSON.
+What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
 
 You've been thinking a lot lately. Maybe it's time to take action?
diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log
@@ -281,7 +281,7 @@ This is your internal monologue, in JSON format:
  {
  "action": "think",
  "args": {
- "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here."
+ "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself."
  }
  },
  {
@@ -304,8 +304,8 @@ This is your internal monologue, in JSON format:
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next thought or action? Your response must be in JSON format.
-It must be an object, and it must contain two fields:
+What is your next single thought or action? Your response must be in JSON format.
+It must be a single object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
@@ -335,8 +335,12 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 
 Notes:
-* your environment is Debian Linux. You can install software with `apt`
-* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
+* you are logged in as opendevin, but sudo will always work without a password.
+* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
+* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
+* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
+* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
+* whenever an action fails, always `think` about why it may have happened before acting again.
 
-What is your next thought or action? Again, you must reply with JSON, and only with JSON.
+What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.