Skip to content

Commit

Permalink
Wd/audio (#962)
Browse files Browse the repository at this point in the history
* rework navigation

* wip

* fix buffering

* finalizing audio feature

* fix lint

* update changelog
  • Loading branch information
willydouhard committed May 6, 2024
1 parent 8a59f43 commit 329e14a
Show file tree
Hide file tree
Showing 76 changed files with 1,484 additions and 1,344 deletions.
21 changes: 21 additions & 0 deletions CHANGELOG.md
Expand Up @@ -8,6 +8,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

Nothing unreleased!

## [1.1.0rc0] - 2024-05-06

### Added

- `cl.on_audio_chunk` decorator to process incoming the user incoming audio stream
- `cl.on_audio_end` decorator to react to the end of the user audio stream
- The `cl.Audio` element now has an `auto_play` property
- `http_referer` is now available in `cl.user_session`

### Changed

- The UI has been revamped, especially the navigation
- The arrow up button has been removed from the input bar, however pressing the arrow up key still opens the last inputs menu
- **[breaking]** the `send()` method on `cl.Message` now returns the message instead of the message id
- **[breaking]** The `multi_modal` feature has been renamed `spontaneous_file_upload` in the config
- Element display property now defaults to `inline` instead of `side`

### Fixed

- Stopping a task should now work better (using asyncio task.cancel)

## [1.0.506] - 2024-04-30

### Added
Expand Down
35 changes: 34 additions & 1 deletion backend/chainlit/__init__.py
Expand Up @@ -52,7 +52,7 @@
from chainlit.step import Step, step
from chainlit.sync import make_async, run_sync
from chainlit.telemetry import trace
from chainlit.types import ChatProfile, ThreadDict
from chainlit.types import AudioChunk, ChatProfile, ThreadDict
from chainlit.user import PersistedUser, User
from chainlit.user_session import user_session
from chainlit.utils import make_module_getattr, wrap_user_function
Expand Down Expand Up @@ -224,6 +224,38 @@ def on_chat_end(func: Callable) -> Callable:
return func


@trace
def on_audio_chunk(func: Callable) -> Callable:
"""
Hook to react to the audio chunks being sent.
Args:
chunk (AudioChunk): The audio chunk being sent.
Returns:
Callable[], Any]: The decorated hook.
"""

config.code.on_audio_chunk = wrap_user_function(func, with_task=False)
return func


@trace
def on_audio_end(func: Callable) -> Callable:
"""
Hook to react to the audio stream ending. This is called after the last audio chunk is sent.
Args:
elements ([List[Element]): The files that were uploaded before starting the audio stream (if any).
Returns:
Callable[], Any]: The decorated hook.
"""

config.code.on_audio_end = wrap_user_function(func, with_task=True)
return func


@trace
def author_rename(func: Callable[[str], str]) -> Callable[[str], str]:
"""
Expand Down Expand Up @@ -318,6 +350,7 @@ def acall(self):
__all__ = [
"user_session",
"CopilotFunction",
"AudioChunk",
"Action",
"User",
"PersistedUser",
Expand Down
58 changes: 39 additions & 19 deletions backend/chainlit/config.py
Expand Up @@ -16,7 +16,9 @@

if TYPE_CHECKING:
from chainlit.action import Action
from chainlit.types import ChatProfile, ThreadDict
from chainlit.element import ElementBased
from chainlit.message import Message
from chainlit.types import AudioChunk, ChatProfile, ThreadDict
from chainlit.user import User
from fastapi import Request, Response

Expand Down Expand Up @@ -71,18 +73,26 @@
# Automatically tag threads with the current chat profile (if a chat profile is used)
auto_tag_thread = true
# Authorize users to upload files with messages
[features.multi_modal]
# Authorize users to spontaneously upload files with messages
[features.spontaneous_file_upload]
enabled = true
accept = ["*/*"]
max_files = 20
max_size_mb = 500
# Allows user to use speech to text
[features.speech_to_text]
enabled = false
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
# language = "en-US"
[features.audio]
# Threshold for audio recording
min_decibels = -45
# Delay for the user to start speaking in MS
initial_silence_timeout = 3000
# Delay for the user to continue speaking in MS. If the user stops speaking for this duration, the recording will stop.
silence_timeout = 1500
# Above this duration (MS), the recording will forcefully stop.
max_duration = 15000
# Duration of the audio chunks in MS
chunk_duration = 1000
# Sample rate of the audio
sample_rate = 44100
[UI]
# Name of the app and chatbot.
Expand Down Expand Up @@ -189,26 +199,31 @@ class Theme(DataClassJsonMixin):


@dataclass
class SpeechToTextFeature:
enabled: Optional[bool] = None
language: Optional[str] = None


@dataclass
class MultiModalFeature:
class SpontaneousFileUploadFeature(DataClassJsonMixin):
enabled: Optional[bool] = None
accept: Optional[Union[List[str], Dict[str, List[str]]]] = None
max_files: Optional[int] = None
max_size_mb: Optional[int] = None


@dataclass
class AudioFeature(DataClassJsonMixin):
min_decibels: int = -45
initial_silence_timeout: int = 2000
silence_timeout: int = 1500
chunk_duration: int = 1000
max_duration: int = 15000
sample_rate: int = 44100
enabled: bool = False


@dataclass()
class FeaturesSettings(DataClassJsonMixin):
prompt_playground: bool = True
multi_modal: Optional[MultiModalFeature] = None
spontaneous_file_upload: Optional[SpontaneousFileUploadFeature] = None
audio: Optional[AudioFeature] = Field(default_factory=AudioFeature)
latex: bool = False
unsafe_allow_html: bool = False
speech_to_text: Optional[SpeechToTextFeature] = None
auto_tag_thread: bool = True


Expand Down Expand Up @@ -247,7 +262,10 @@ class CodeSettings:
on_chat_start: Optional[Callable[[], Any]] = None
on_chat_end: Optional[Callable[[], Any]] = None
on_chat_resume: Optional[Callable[["ThreadDict"], Any]] = None
on_message: Optional[Callable[[str], Any]] = None
on_message: Optional[Callable[["Message"], Any]] = None
on_audio_chunk: Optional[Callable[["AudioChunk"], Any]] = None
on_audio_end: Optional[Callable[[List["ElementBased"]], Any]] = None

author_rename: Optional[Callable[[str], str]] = None
on_settings_update: Optional[Callable[[Dict[str, Any]], Any]] = None
set_chat_profiles: Optional[Callable[[Optional["User"]], List["ChatProfile"]]] = (
Expand Down Expand Up @@ -413,11 +431,13 @@ def load_settings():

ui_settings = UISettings(**ui_settings)

code_settings = CodeSettings(action_callbacks={})

return {
"features": features_settings,
"ui": ui_settings,
"project": project_settings,
"code": CodeSettings(action_callbacks={}),
"code": code_settings,
}


Expand Down
6 changes: 4 additions & 2 deletions backend/chainlit/data/__init__.py
Expand Up @@ -156,6 +156,7 @@ def attachment_to_element_dict(self, attachment: Attachment) -> "ElementDict":
"chainlitKey": None,
"display": metadata.get("display", "side"),
"language": metadata.get("language"),
"autoPlay": metadata.get("autoPlay", None),
"page": metadata.get("page"),
"size": metadata.get("size"),
"type": metadata.get("type", "file"),
Expand Down Expand Up @@ -219,7 +220,7 @@ def step_to_step_dict(self, step: LiteralStep) -> "StepDict":
"disableFeedback": metadata.get("disableFeedback", False),
"indent": metadata.get("indent"),
"language": metadata.get("language"),
"isError": metadata.get("isError", False),
"isError": bool(step.error),
"waitForAnswer": metadata.get("waitForAnswer", False),
}

Expand Down Expand Up @@ -348,7 +349,6 @@ async def create_step(self, step_dict: "StepDict"):
step_dict.get("metadata", {}),
**{
"disableFeedback": step_dict.get("disableFeedback"),
"isError": step_dict.get("isError"),
"waitForAnswer": step_dict.get("waitForAnswer"),
"language": step_dict.get("language"),
"showInput": step_dict.get("showInput"),
Expand All @@ -372,6 +372,8 @@ async def create_step(self, step_dict: "StepDict"):
step["input"] = {"content": step_dict.get("input")}
if step_dict.get("output"):
step["output"] = {"content": step_dict.get("output")}
if step_dict.get("isError"):
step["error"] = step_dict.get("output")

await self.client.api.send_steps([step])

Expand Down
31 changes: 19 additions & 12 deletions backend/chainlit/data/sql_alchemy.py
Expand Up @@ -170,12 +170,14 @@ async def update_thread(
raise ValueError("User not found in session context")
data = {
"id": thread_id,
"createdAt": await self.get_current_timestamp()
if metadata is None
else None,
"name": name
if name is not None
else (metadata.get("name") if metadata and "name" in metadata else None),
"createdAt": (
await self.get_current_timestamp() if metadata is None else None
),
"name": (
name
if name is not None
else (metadata.get("name") if metadata and "name" in metadata else None)
),
"userId": user_id,
"userIdentifier": user_identifier,
"tags": tags,
Expand Down Expand Up @@ -552,13 +554,17 @@ async def get_all_user_threads(
streaming=step_feedback.get("step_streaming", False),
waitForAnswer=step_feedback.get("step_waitforanswer"),
isError=step_feedback.get("step_iserror"),
metadata=step_feedback["step_metadata"]
if step_feedback.get("step_metadata") is not None
else {},
metadata=(
step_feedback["step_metadata"]
if step_feedback.get("step_metadata") is not None
else {}
),
tags=step_feedback.get("step_tags"),
input=step_feedback.get("step_input", "")
if step_feedback["step_showinput"]
else "",
input=(
step_feedback.get("step_input", "")
if step_feedback["step_showinput"]
else ""
),
output=step_feedback.get("step_output", ""),
createdAt=step_feedback.get("step_createdat"),
start=step_feedback.get("step_start"),
Expand Down Expand Up @@ -587,6 +593,7 @@ async def get_all_user_threads(
display=element["element_display"],
size=element.get("element_size"),
language=element.get("element_language"),
autoPlay=element.get("element_autoPlay"),
page=element.get("element_page"),
forId=element.get("element_forid"),
mime=element.get("element_mime"),
Expand Down
5 changes: 4 additions & 1 deletion backend/chainlit/element.py
Expand Up @@ -38,6 +38,7 @@ class ElementDict(TypedDict):
size: Optional[ElementSize]
language: Optional[str]
page: Optional[int]
autoPlay: Optional[bool]
forId: Optional[str]
mime: Optional[str]

Expand All @@ -61,7 +62,7 @@ class Element:
# The byte content of the element.
content: Optional[Union[bytes, str]] = None
# Controls how the image element should be displayed in the UI. Choices are “side” (default), “inline”, or “page”.
display: ElementDisplay = Field(default="side")
display: ElementDisplay = Field(default="inline")
# Controls element size
size: Optional[ElementSize] = None
# The ID of the message this element is associated with.
Expand Down Expand Up @@ -93,6 +94,7 @@ def to_dict(self) -> ElementDict:
"objectKey": getattr(self, "object_key", None),
"size": getattr(self, "size", None),
"page": getattr(self, "page", None),
"autoPlay": getattr(self, "auto_play", None),
"language": getattr(self, "language", None),
"forId": getattr(self, "for_id", None),
"mime": getattr(self, "mime", None),
Expand Down Expand Up @@ -306,6 +308,7 @@ async def preprocess_content(self):
@dataclass
class Audio(Element):
type: ClassVar[ElementType] = "audio"
auto_play: bool = False


@dataclass
Expand Down
13 changes: 7 additions & 6 deletions backend/chainlit/llama_index/callbacks.py
Expand Up @@ -70,7 +70,7 @@ def on_event_start(
) -> str:
"""Run when an event starts and return id of event."""
self._restore_context()

step_type: StepType = "undefined"
if event_type == CBEventType.RETRIEVE:
step_type = "retrieval"
Expand Down Expand Up @@ -104,7 +104,6 @@ def on_event_end(
"""Run when an event ends."""
step = self.steps.get(event_id, None)


if payload is None or step is None:
return

Expand All @@ -117,11 +116,13 @@ def on_event_end(
source_nodes = getattr(response, "source_nodes", None)
if source_nodes:
source_refs = ", ".join(
[f"Source {idx}" for idx, _ in enumerate(source_nodes)])
[f"Source {idx}" for idx, _ in enumerate(source_nodes)]
)
step.elements = [
Text(
name=f"Source {idx}",
content=source.text or "Empty node",
display="side",
)
for idx, source in enumerate(source_nodes)
]
Expand All @@ -137,6 +138,7 @@ def on_event_end(
step.elements = [
Text(
name=f"Source {idx}",
display="side",
content=source.node.get_text() or "Empty node",
)
for idx, source in enumerate(sources)
Expand Down Expand Up @@ -173,7 +175,7 @@ def on_event_end(
token_count = self.total_llm_token_count or None
raw_response = response.raw if response else None
model = raw_response.get("model", None) if raw_response else None

if messages and isinstance(response, ChatResponse):
msg: ChatMessage = response.message
step.generation = ChatGeneration(
Expand All @@ -198,12 +200,11 @@ def on_event_end(
else:
step.output = payload
self.context.loop.create_task(step.update())

self.steps.pop(event_id, None)

def _noop(self, *args, **kwargs):
pass

start_trace = _noop
end_trace = _noop

0 comments on commit 329e14a

Please sign in to comment.