bigscience-workshop · borzunov · Sep 28, 2023
diff --git a/src/petals/client/config.py b/src/petals/client/config.py
@@ -27,7 +27,7 @@ class ClientConfig:
 
  max_retries: Optional[int] = DEFAULT_MAX_RETRIES # max number of retries before an exception (default: inf)
  min_backoff: float = 1 # after a repeated failure, sleep for this many seconds times 2 ** (num_failures - 1)
- max_backoff: float = 60 # limit maximal sleep time between retries to this value
+ max_backoff: float = 5 # limit maximal sleep time between retries to this value
  ban_timeout: float = 15 # when a remote peer fails to respond, prevent routing to that peer for this many seconds
  active_adapter: Optional[str] = None # name of active LoRA adapter (usually, Hugging Face repo)
 

diff --git a/src/petals/client/inference_session.py b/src/petals/client/inference_session.py
@@ -144,6 +144,12 @@ def step(
  )
  )
  )
+
+ import random
+
+ if random.random() < 0.05:
+ raise Exception("fail")
+
  outputs = list(map(deserialize_torch_tensor, outputs_serialized.tensors))
  assert (
  outputs[0].shape == inputs.shape