Wd/audio (#962)

* rework navigation * wip * fix buffering * finalizing audio feature * fix lint * update changelog
Chainlit · May 6, 2024 · 329e14a · 329e14a
1 parent 8a59f43
commit 329e14a
Show file tree

Hide file tree

Showing 76 changed files with 1,484 additions and 1,344 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 Nothing unreleased!
 
+## [1.1.0rc0] - 2024-05-06
+
+### Added
+
+- `cl.on_audio_chunk` decorator to process incoming the user incoming audio stream
+- `cl.on_audio_end` decorator to react to the end of the user audio stream
+- The `cl.Audio` element now has an `auto_play` property
+- `http_referer` is now available in `cl.user_session`
+
+### Changed
+
+- The UI has been revamped, especially the navigation
+- The arrow up button has been removed from the input bar, however pressing the arrow up key still opens the last inputs menu
+- **[breaking]** the `send()` method on `cl.Message` now returns the message instead of the message id
+- **[breaking]** The `multi_modal` feature has been renamed `spontaneous_file_upload` in the config
+- Element display property now defaults to `inline` instead of `side`
+
+### Fixed
+
+- Stopping a task should now work better (using asyncio task.cancel)
+
 ## [1.0.506] - 2024-04-30
 
 ### Added

diff --git a/backend/chainlit/__init__.py b/backend/chainlit/__init__.py
@@ -52,7 +52,7 @@
 from chainlit.step import Step, step
 from chainlit.sync import make_async, run_sync
 from chainlit.telemetry import trace
-from chainlit.types import ChatProfile, ThreadDict
+from chainlit.types import AudioChunk, ChatProfile, ThreadDict
 from chainlit.user import PersistedUser, User
 from chainlit.user_session import user_session
 from chainlit.utils import make_module_getattr, wrap_user_function
@@ -224,6 +224,38 @@ def on_chat_end(func: Callable) -> Callable:
  return func
 
 
+@trace
+def on_audio_chunk(func: Callable) -> Callable:
+ """
+ Hook to react to the audio chunks being sent.
+
+ Args:
+ chunk (AudioChunk): The audio chunk being sent.
+
+ Returns:
+ Callable[], Any]: The decorated hook.
+ """
+
+ config.code.on_audio_chunk = wrap_user_function(func, with_task=False)
+ return func
+
+
+@trace
+def on_audio_end(func: Callable) -> Callable:
+ """
+ Hook to react to the audio stream ending. This is called after the last audio chunk is sent.
+
+ Args:
+ elements ([List[Element]): The files that were uploaded before starting the audio stream (if any).
+
+ Returns:
+ Callable[], Any]: The decorated hook.
+ """
+
+ config.code.on_audio_end = wrap_user_function(func, with_task=True)
+ return func
+
+
 @trace
 def author_rename(func: Callable[[str], str]) -> Callable[[str], str]:
  """
@@ -318,6 +350,7 @@ def acall(self):
 __all__ = [
  "user_session",
  "CopilotFunction",
+ "AudioChunk",
  "Action",
  "User",
  "PersistedUser",

diff --git a/backend/chainlit/config.py b/backend/chainlit/config.py
@@ -16,7 +16,9 @@
 
 if TYPE_CHECKING:
  from chainlit.action import Action
- from chainlit.types import ChatProfile, ThreadDict
+ from chainlit.element import ElementBased
+ from chainlit.message import Message
+ from chainlit.types import AudioChunk, ChatProfile, ThreadDict
  from chainlit.user import User
  from fastapi import Request, Response
 
@@ -71,18 +73,26 @@
 # Automatically tag threads with the current chat profile (if a chat profile is used)
 auto_tag_thread = true
 
-# Authorize users to upload files with messages
-[features.multi_modal]
+# Authorize users to spontaneously upload files with messages
+[features.spontaneous_file_upload]
  enabled = true
  accept = ["*/*"]
  max_files = 20
  max_size_mb = 500
 
-# Allows user to use speech to text
-[features.speech_to_text]
- enabled = false
- # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
- # language = "en-US"
+[features.audio]
+ # Threshold for audio recording
+ min_decibels = -45
+ # Delay for the user to start speaking in MS
+ initial_silence_timeout = 3000
+ # Delay for the user to continue speaking in MS. If the user stops speaking for this duration, the recording will stop.
+ silence_timeout = 1500
+ # Above this duration (MS), the recording will forcefully stop.
+ max_duration = 15000
+ # Duration of the audio chunks in MS
+ chunk_duration = 1000
+ # Sample rate of the audio
+ sample_rate = 44100
 
 [UI]
 # Name of the app and chatbot.
@@ -189,26 +199,31 @@ class Theme(DataClassJsonMixin):
 
 
 @dataclass
-class SpeechToTextFeature:
- enabled: Optional[bool] = None
- language: Optional[str] = None
-
-
-@dataclass
-class MultiModalFeature:
+class SpontaneousFileUploadFeature(DataClassJsonMixin):
  enabled: Optional[bool] = None
  accept: Optional[Union[List[str], Dict[str, List[str]]]] = None
  max_files: Optional[int] = None
  max_size_mb: Optional[int] = None
 
 
+@dataclass
+class AudioFeature(DataClassJsonMixin):
+ min_decibels: int = -45
+ initial_silence_timeout: int = 2000
+ silence_timeout: int = 1500
+ chunk_duration: int = 1000
+ max_duration: int = 15000
+ sample_rate: int = 44100
+ enabled: bool = False
+
+
 @dataclass()
 class FeaturesSettings(DataClassJsonMixin):
  prompt_playground: bool = True
- multi_modal: Optional[MultiModalFeature] = None
+ spontaneous_file_upload: Optional[SpontaneousFileUploadFeature] = None
+ audio: Optional[AudioFeature] = Field(default_factory=AudioFeature)
  latex: bool = False
  unsafe_allow_html: bool = False
- speech_to_text: Optional[SpeechToTextFeature] = None
  auto_tag_thread: bool = True
 
 
@@ -247,7 +262,10 @@ class CodeSettings:
  on_chat_start: Optional[Callable[[], Any]] = None
  on_chat_end: Optional[Callable[[], Any]] = None
  on_chat_resume: Optional[Callable[["ThreadDict"], Any]] = None
- on_message: Optional[Callable[[str], Any]] = None
+ on_message: Optional[Callable[["Message"], Any]] = None
+ on_audio_chunk: Optional[Callable[["AudioChunk"], Any]] = None
+ on_audio_end: Optional[Callable[[List["ElementBased"]], Any]] = None
+
  author_rename: Optional[Callable[[str], str]] = None
  on_settings_update: Optional[Callable[[Dict[str, Any]], Any]] = None
  set_chat_profiles: Optional[Callable[[Optional["User"]], List["ChatProfile"]]] = (
@@ -413,11 +431,13 @@ def load_settings():
 
  ui_settings = UISettings(**ui_settings)
 
+ code_settings = CodeSettings(action_callbacks={})
+
  return {
  "features": features_settings,
  "ui": ui_settings,
  "project": project_settings,
- "code": CodeSettings(action_callbacks={}),
+ "code": code_settings,
  }
 
 

diff --git a/backend/chainlit/data/__init__.py b/backend/chainlit/data/__init__.py
@@ -156,6 +156,7 @@ def attachment_to_element_dict(self, attachment: Attachment) -> "ElementDict":
  "chainlitKey": None,
  "display": metadata.get("display", "side"),
  "language": metadata.get("language"),
+ "autoPlay": metadata.get("autoPlay", None),
  "page": metadata.get("page"),
  "size": metadata.get("size"),
  "type": metadata.get("type", "file"),
@@ -219,7 +220,7 @@ def step_to_step_dict(self, step: LiteralStep) -> "StepDict":
  "disableFeedback": metadata.get("disableFeedback", False),
  "indent": metadata.get("indent"),
  "language": metadata.get("language"),
- "isError": metadata.get("isError", False),
+ "isError": bool(step.error),
  "waitForAnswer": metadata.get("waitForAnswer", False),
  }
 
@@ -348,7 +349,6 @@ async def create_step(self, step_dict: "StepDict"):
  step_dict.get("metadata", {}),
  **{
  "disableFeedback": step_dict.get("disableFeedback"),
- "isError": step_dict.get("isError"),
  "waitForAnswer": step_dict.get("waitForAnswer"),
  "language": step_dict.get("language"),
  "showInput": step_dict.get("showInput"),
@@ -372,6 +372,8 @@ async def create_step(self, step_dict: "StepDict"):
  step["input"] = {"content": step_dict.get("input")}
  if step_dict.get("output"):
  step["output"] = {"content": step_dict.get("output")}
+ if step_dict.get("isError"):
+ step["error"] = step_dict.get("output")
 
  await self.client.api.send_steps([step])
 

diff --git a/backend/chainlit/data/sql_alchemy.py b/backend/chainlit/data/sql_alchemy.py
@@ -170,12 +170,14 @@ async def update_thread(
  raise ValueError("User not found in session context")
  data = {
  "id": thread_id,
- "createdAt": await self.get_current_timestamp()
- if metadata is None
- else None,
- "name": name
- if name is not None
- else (metadata.get("name") if metadata and "name" in metadata else None),
+ "createdAt": (
+ await self.get_current_timestamp() if metadata is None else None
+ ),
+ "name": (
+ name
+ if name is not None
+ else (metadata.get("name") if metadata and "name" in metadata else None)
+ ),
  "userId": user_id,
  "userIdentifier": user_identifier,
  "tags": tags,
@@ -552,13 +554,17 @@ async def get_all_user_threads(
  streaming=step_feedback.get("step_streaming", False),
  waitForAnswer=step_feedback.get("step_waitforanswer"),
  isError=step_feedback.get("step_iserror"),
- metadata=step_feedback["step_metadata"]
- if step_feedback.get("step_metadata") is not None
- else {},
+ metadata=(
+ step_feedback["step_metadata"]
+ if step_feedback.get("step_metadata") is not None
+ else {}
+ ),
  tags=step_feedback.get("step_tags"),
- input=step_feedback.get("step_input", "")
- if step_feedback["step_showinput"]
- else "",
+ input=(
+ step_feedback.get("step_input", "")
+ if step_feedback["step_showinput"]
+ else ""
+ ),
  output=step_feedback.get("step_output", ""),
  createdAt=step_feedback.get("step_createdat"),
  start=step_feedback.get("step_start"),
@@ -587,6 +593,7 @@ async def get_all_user_threads(
  display=element["element_display"],
  size=element.get("element_size"),
  language=element.get("element_language"),
+ autoPlay=element.get("element_autoPlay"),
  page=element.get("element_page"),
  forId=element.get("element_forid"),
  mime=element.get("element_mime"),

diff --git a/backend/chainlit/element.py b/backend/chainlit/element.py
@@ -38,6 +38,7 @@ class ElementDict(TypedDict):
  size: Optional[ElementSize]
  language: Optional[str]
  page: Optional[int]
+ autoPlay: Optional[bool]
  forId: Optional[str]
  mime: Optional[str]
 
@@ -61,7 +62,7 @@ class Element:
  # The byte content of the element.
  content: Optional[Union[bytes, str]] = None
  # Controls how the image element should be displayed in the UI. Choices are “side” (default), “inline”, or “page”.
- display: ElementDisplay = Field(default="side")
+ display: ElementDisplay = Field(default="inline")
  # Controls element size
  size: Optional[ElementSize] = None
  # The ID of the message this element is associated with.
@@ -93,6 +94,7 @@ def to_dict(self) -> ElementDict:
  "objectKey": getattr(self, "object_key", None),
  "size": getattr(self, "size", None),
  "page": getattr(self, "page", None),
+ "autoPlay": getattr(self, "auto_play", None),
  "language": getattr(self, "language", None),
  "forId": getattr(self, "for_id", None),
  "mime": getattr(self, "mime", None),
@@ -306,6 +308,7 @@ async def preprocess_content(self):
 @dataclass
 class Audio(Element):
  type: ClassVar[ElementType] = "audio"
+ auto_play: bool = False
 
 
 @dataclass

diff --git a/backend/chainlit/llama_index/callbacks.py b/backend/chainlit/llama_index/callbacks.py
@@ -70,7 +70,7 @@ def on_event_start(
  ) -> str:
  """Run when an event starts and return id of event."""
  self._restore_context()
- 
+
  step_type: StepType = "undefined"
  if event_type == CBEventType.RETRIEVE:
  step_type = "retrieval"
@@ -104,7 +104,6 @@ def on_event_end(
  """Run when an event ends."""
  step = self.steps.get(event_id, None)
 
-
  if payload is None or step is None:
  return
 
@@ -117,11 +116,13 @@ def on_event_end(
  source_nodes = getattr(response, "source_nodes", None)
  if source_nodes:
  source_refs = ", ".join(
- [f"Source {idx}" for idx, _ in enumerate(source_nodes)])
+ [f"Source {idx}" for idx, _ in enumerate(source_nodes)]
+ )
  step.elements = [
  Text(
  name=f"Source {idx}",
  content=source.text or "Empty node",
+ display="side",
  )
  for idx, source in enumerate(source_nodes)
  ]
@@ -137,6 +138,7 @@ def on_event_end(
  step.elements = [
  Text(
  name=f"Source {idx}",
+ display="side",
  content=source.node.get_text() or "Empty node",
  )
  for idx, source in enumerate(sources)
@@ -173,7 +175,7 @@ def on_event_end(
  token_count = self.total_llm_token_count or None
  raw_response = response.raw if response else None
  model = raw_response.get("model", None) if raw_response else None
- 
+
  if messages and isinstance(response, ChatResponse):
  msg: ChatMessage = response.message
  step.generation = ChatGeneration(
@@ -198,12 +200,11 @@ def on_event_end(
  else:
  step.output = payload
  self.context.loop.create_task(step.update())
- 
+
  self.steps.pop(event_id, None)
 
  def _noop(self, *args, **kwargs):
  pass
 
  start_trace = _noop
  end_trace = _noop
-