From e091bdf8d36e127e28bc4a80f63b60fed3816b38 Mon Sep 17 00:00:00 2001 From: Tonic Date: Sat, 27 Apr 2024 23:18:42 +0200 Subject: [PATCH 1/7] Llama Tonic Init --- .../llama-index-packs-llama-tonic/.gitignore | 153 ++++++++++++++++++ .../llama-index-packs-llama-tonic/BUILD | 1 + .../llama-index-packs-llama-tonic/Makefile | 17 ++ .../llama-index-packs-llama-tonic/README.md | 18 +++ .../llama_index/packs/llama_tonic/__init__.py | 4 + .../packs/llama_tonic/memory/__init__.py | 4 + .../packs/llama_tonic/whisper/__init__.py | 4 + .../pyproject.toml | 59 +++++++ .../tests/__init__.py | 0 .../tests/test_packs_llama_tonic.py | 0 10 files changed, 260 insertions(+) create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/.gitignore create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/BUILD create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/Makefile create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/README.md create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/whisper/__init__.py create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/tests/__init__.py create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py diff --git a/llama-index-packs/llama-index-packs-llama-tonic/.gitignore b/llama-index-packs/llama-index-packs-llama-tonic/.gitignore new file mode 100644 index 0000000000000..990c18de22908 --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-packs/llama-index-packs-llama-tonic/BUILD b/llama-index-packs/llama-index-packs-llama-tonic/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-packs/llama-index-packs-llama-tonic/Makefile b/llama-index-packs/llama-index-packs-llama-tonic/Makefile new file mode 100644 index 0000000000000..b9eab05aa3706 --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-packs/llama-index-packs-llama-tonic/README.md b/llama-index-packs/llama-index-packs-llama-tonic/README.md new file mode 100644 index 0000000000000..fc31d14d568d7 --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/README.md @@ -0,0 +1,18 @@ +# 👆🏻Llama🦙🌟Tonic + +Welcome🙋🏻‍♂️to Llama Tonic Llama Pack ! + +## Packs + +- [x] Speech To Text +- [x] Agentic Memory + +## Installation + +```bash +pip install llama-index-pack-llama-tonic +``` + +```bash +pip install llama-index-pack-llama-tonic[whisper] +``` diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py new file mode 100644 index 0000000000000..00666255f80c9 --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py @@ -0,0 +1,4 @@ +from llama_index.packs.llama_tonic.base import + + +__all__ = [""] diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py new file mode 100644 index 0000000000000..00666255f80c9 --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py @@ -0,0 +1,4 @@ +from llama_index.packs.llama_tonic.base import + + +__all__ = [""] diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/whisper/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/whisper/__init__.py new file mode 100644 index 0000000000000..00666255f80c9 --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/whisper/__init__.py @@ -0,0 +1,4 @@ +from llama_index.packs.llama_tonic.base import + + +__all__ = [""] diff --git a/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml b/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml new file mode 100644 index 0000000000000..d008a493febb8 --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml @@ -0,0 +1,59 @@ +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.codespell] +check-filenames = true +check-hidden = true +# Feel free to un-skip examples, and experimental, you will just need to +# work through many typos (--write-changes and --interactive will help) +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +# [tool.llamahub] +# contains_example = false +# import_path = "" + +[tool.llamahub.class_authors] +CLASS = ["Josephrp", "Tonic-AI" , "TeamTonic"] + +[tool.mypy] +disallow_untyped_defs = true +# Remove venv skip when integrated with pre-commit +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +name = "llama-index-packs-llama-tonic" +version = "0.0.1" +description = "Llama Tonic Are Useful Tools For Accessible & Enterprise Applications. Includes Transcription & Text To Speech + Memory Pipelines (currently)" +authors = ["Josephrp ", "TeamTonic 0.991 +types-setuptools = "67.1.0.0" diff --git a/llama-index-packs/llama-index-packs-llama-tonic/tests/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py b/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 7378268154bfaab4c57de59cb8d041615930533f Mon Sep 17 00:00:00 2001 From: Tonic Date: Sun, 28 Apr 2024 00:16:42 +0200 Subject: [PATCH 2/7] adding llama tonic --- .../llama_index/packs/llama_tonic/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py index 00666255f80c9..2b1975c634fc9 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py +++ b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py @@ -1,4 +1,5 @@ -from llama_index.packs.llama_tonic.base import +from llama_index.packs.llama_tonic.whisper import +from llama_index.packs.llama_tonic.memory import git s __all__ = [""] From 4c504a373ba49af36c51754bec425d3a28e9fbc0 Mon Sep 17 00:00:00 2001 From: Tonic Date: Sun, 28 Apr 2024 00:54:16 +0200 Subject: [PATCH 3/7] add transcription --- .../llama-index-packs-llama-tonic/README.md | 6 +++- .../examples/transcription.ipynb | 0 .../llama_index/packs/llama_tonic/__init__.py | 7 ++--- .../packs/llama_tonic/memory/__init__.py | 4 --- .../packs/llama_tonic/memory/agent.py | 0 .../llama_tonic/transcription/__init__.py | 0 .../llama_tonic/transcription/whisper.py | 31 +++++++++++++++++++ .../packs/llama_tonic/whisper/__init__.py | 4 --- .../pyproject.toml | 10 +++--- 9 files changed, 44 insertions(+), 18 deletions(-) create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/examples/transcription.ipynb create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/agent.py create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/transcription/__init__.py create mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/transcription/whisper.py delete mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/whisper/__init__.py diff --git a/llama-index-packs/llama-index-packs-llama-tonic/README.md b/llama-index-packs/llama-index-packs-llama-tonic/README.md index fc31d14d568d7..4505d488d6a80 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/README.md +++ b/llama-index-packs/llama-index-packs-llama-tonic/README.md @@ -14,5 +14,9 @@ pip install llama-index-pack-llama-tonic ``` ```bash -pip install llama-index-pack-llama-tonic[whisper] +pip install llama-index-pack-llama-tonic-transcription +``` + +```bash +pip install llama-index-pack-llama-tonic-memory ``` diff --git a/llama-index-packs/llama-index-packs-llama-tonic/examples/transcription.ipynb b/llama-index-packs/llama-index-packs-llama-tonic/examples/transcription.ipynb new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py index 2b1975c634fc9..4faf6c0cc2dab 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py +++ b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py @@ -1,5 +1,4 @@ -from llama_index.packs.llama_tonic.whisper import -from llama_index.packs.llama_tonic.memory import git s +from llama_index.packs.llama_tonic.transcription.whisper import Transcribe +from llama_index.packs.llama_tonic.memory.agent import AgentMemory - -__all__ = [""] +__all__ = ["Transcribe","AgentMemory"] diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py index 00666255f80c9..e69de29bb2d1d 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py +++ b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py @@ -1,4 +0,0 @@ -from llama_index.packs.llama_tonic.base import - - -__all__ = [""] diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/agent.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/agent.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/transcription/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/transcription/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/transcription/whisper.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/transcription/whisper.py new file mode 100644 index 0000000000000..7dedd076d1774 --- /dev/null +++ b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/transcription/whisper.py @@ -0,0 +1,31 @@ +# transcription/whisper.py + +import torch +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor +from transformers import pipeline + +class Transcribe: + def __init__(self): + device = "cuda:0" if torch.cuda.is_available() else "cpu" + torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 + model_id = "distil-whisper/distil-large-v3" + self.model = AutoModelForSpeechSeq2Seq.from_pretrained( + model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True + ) + self.model.to(device) + self.processor = AutoProcessor.from_pretrained(model_id) + self.pipe = pipeline( + "automatic-speech-recognition", + model=self.model, + tokenizer=self.processor.tokenizer, + feature_extractor=self.processor.feature_extractor, + max_new_tokens=128, + chunk_length_s=25, + batch_size=16, + torch_dtype=torch_dtype, + device=device, + ) + + def transcribe(self, audio_file): + result = self.pipe(audio_file) + return result["text"] \ No newline at end of file diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/whisper/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/whisper/__init__.py deleted file mode 100644 index 00666255f80c9..0000000000000 --- a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/whisper/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from llama_index.packs.llama_tonic.base import - - -__all__ = [""] diff --git a/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml b/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml index d008a493febb8..0a0b12598034b 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml +++ b/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml @@ -9,12 +9,12 @@ check-hidden = true # work through many typos (--write-changes and --interactive will help) skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" -# [tool.llamahub] -# contains_example = false -# import_path = "" +[tool.llamahub] +contains_example = false +import_path = "llama_index.packs.llama_tonic" [tool.llamahub.class_authors] -CLASS = ["Josephrp", "Tonic-AI" , "TeamTonic"] +Llama_Tonic = ["Josephrp", "Tonic-AI" , "TeamTonic"] [tool.mypy] disallow_untyped_defs = true @@ -27,7 +27,7 @@ python_version = "3.8" name = "llama-index-packs-llama-tonic" version = "0.0.1" description = "Llama Tonic Are Useful Tools For Accessible & Enterprise Applications. Includes Transcription & Text To Speech + Memory Pipelines (currently)" -authors = ["Josephrp ", "TeamTonic ", "TeamTonic Date: Sun, 28 Apr 2024 13:43:31 +0200 Subject: [PATCH 4/7] add testing examples and documentation --- .../llama-index-packs-llama-tonic/README.md | 116 +++++++++++++++++- .../examples/transcription.ipynb | 66 ++++++++++ .../packs/llama_tonic/memory/__init__.py | 0 .../packs/llama_tonic/memory/agent.py | 0 .../pyproject.toml | 9 +- .../tests/test_packs_llama_tonic.py | 29 +++++ 6 files changed, 212 insertions(+), 8 deletions(-) delete mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/agent.py diff --git a/llama-index-packs/llama-index-packs-llama-tonic/README.md b/llama-index-packs/llama-index-packs-llama-tonic/README.md index 4505d488d6a80..edf7411a26c40 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/README.md +++ b/llama-index-packs/llama-index-packs-llama-tonic/README.md @@ -1,11 +1,54 @@ # 👆🏻Llama🦙🌟Tonic -Welcome🙋🏻‍♂️to Llama Tonic Llama Pack ! +Welcome🙋🏻‍♂️to the Llama-Tonic Llama Pack ! Tonic-AI is an opensource builders' community that uses `llama-index`frequently. Here we're sharing some common tools that we use ourselves with the `llama-index` community. + +
+ ## 🧑🏽‍🤝‍👩🏼Contributing + + +We are thrilled you're considering contributing to Llama-Tonic! Being part of the Tonic-AI open source community means collaborating with talented builders and creators dedicated to enhancing the `llama-index` experience. Here’s how you can [join us](https://discord.gg/rAEGH3B49b) and start contributing: + +### Step 1: Join Our Community + +Before contributing, it’s a good idea to get familiar with our community and projects. Join our Discord server to connect with other contributors and get insights on project needs and directions. Here is the link to join: [Join Llama-Tonic Discord](https://discord.gg/rAEGH3B49b) + +### Step 2: Sign Up and Set Up + +Visit our GitLab repository to view the project code and issues. You will need to sign up if you haven't already: + +[Sign up](https://git.tonic-ai.com) and [Explore our GitLab Repository](https://git.tonic-ai.com/contribute/LlamaIndex/LlamaTonic) + +### Step 3: Open an Issue + +If you notice a bug, have suggestions for improvements, or especially a new feature idea, please check the issue tracker to see if someone else has already submitted a similar issue. If not, open a new issue and clearly describe your bug, idea, or suggestion. + +### Step 4: Create a Named Branch + +Once your proposal is approved, or you want to tackle an existing issue, fork the repository and create a named branch from the main branch where you can work on your changes. Using a named branch helps organize reviews and integration. For example: + +```bash +git checkout -b devbranch/add-mem-gpt +``` + +### Step 5: Build and Test + +- Develop your feature contribution. +- Build tests for new codes and validate that all tests pass. +- Document any new code with comments and update the README or associated documentation as necessary. + +### Join Team Tonic + +By contributing cool features to `Llama-Tonic`, you become a part of `Team Tonic`. Team Tonic and `Tonic-AI` are always building and evolving, and we are excited to see where your creativity and talent take this project! + +[Let's build together and make Llama-Tonic even better](https://discord.gg/rAEGH3B49b). Thank you for your interest and we look forward to your contributions! + +
## Packs -- [x] Speech To Text -- [x] Agentic Memory +- [ ] Agentic Memory +- [x] Transcription + - [ ] Improve Results With Student-Teacher Mode ## Installation @@ -13,10 +56,75 @@ Welcome🙋🏻‍♂️to Llama Tonic Llama Pack ! pip install llama-index-pack-llama-tonic ``` +## ✍🏻Transcription: + +`./llama_tonic/transcription/whisper.py` contains a class `Transcribe`. This class is designed to perform automatic speech recognition (ASR) using the `distil-whisper/distil-large-v3` model to transcribe audio files into text. Here is a simple guide and example usage of how to utilize the `Transcribe` class for transcribing audio content. + +### Why it's Cool😎: + +- **Deployable:** runs on CPU & GPU +- **Extremely Quick:** much faster than APIs +- **Precise:** <1% error rate +- **Super Easy Useage** with `llama-index`: file in , text out , the rest is handled accordingly. + +### Prerequisites: + +Before using the `Transcribe` class, make sure you have the necessary libraries installed. Install the required libraries using pip: + ```bash pip install llama-index-pack-llama-tonic-transcription ``` +### CLI Usage + +You can download llamapacks directly using `llamaindex-cli`, which comes installed with the `llama-index` python package: + +```bash +llamaindex-cli download-llamapack llama-tonic --download-dir ./llama_tonic +``` + +You can then inspect the files at `./llama_tonic` and use them as a template for your own project! + +You can also use it directly in the command line: + ```bash -pip install llama-index-pack-llama-tonic-memory +llamaindex-cli llama-tonic-transcription --audio_file./path/to/your/audio.wav ``` + +### Code Description: + +- **Class Initialization (`__init__`)**: The class initializes a model designed for speech-to-text transformation. It automatically selects the computing device (GPU if available; otherwise CPU) and the data type (`torch.float16` for GPU to optimize memory, and `torch.float32` for CPU). +- **Transcription Method (`transcribe`)**: This method takes the path to an audio file as input and returns the transcribed text as output. It uses a processing pipeline configured with the model. + +### Programmatic Usage: +Here's how you can use the `Transcribe` class to transcribe audio files: + +```python + +from llama_index.packs.llama_tonic.transcription import Transcribe + +def main(): + # Initialize the transcriber + transcriber = Transcribe() + + # Path to your audio file + audio_file_path = "path_to_your_audio_file.wav" + + # Transcribing the audio file to text + transcribed_text = transcriber.transcribe(audio_file_path) + + # Print the result + print("Transcribed Text:", transcribed_text) + +if __name__ == "__main__": + main() +``` + +### Notes: + +- ***When using `Transcribe`for the first time , it can take a while to download and load the model for the first transcription, but the next ones are super fast !*** + +### Tests: +The provided code setup also includes unit tests in `tests/test_packs_llama_tonic.py` which can be run using `pytest` to ensure functionality of the transcriber. It validates basic functionality, error handling, and the configuration of the device and data types. + +That's how you can integrate and use the `Transcribe` class for speech-to-text applications, harnessing the powerful ASR capability of `transformers` in Python. This allows applications ranging from automated transcription services, voice command interfaces, to more complex audio processing tasks in your `llama-index agents`. \ No newline at end of file diff --git a/llama-index-packs/llama-index-packs-llama-tonic/examples/transcription.ipynb b/llama-index-packs/llama-index-packs-llama-tonic/examples/transcription.ipynb index e69de29bb2d1d..d2cac868e2f91 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/examples/transcription.ipynb +++ b/llama-index-packs/llama-index-packs-llama-tonic/examples/transcription.ipynb @@ -0,0 +1,66 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Try out the transcription pack! You can then add it as a data reader for audio files ! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install llama-index llama-index-packs-llama-tonic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.packs.llama_tonic.transcription import Transcribe\n", + "\n", + "def main():\n", + " # Initialize the transcriber\n", + " transcriber = Transcribe()\n", + " \n", + " # Path to your audio file\n", + " audio_file_path = \"path_to_your_audio_file.wav\"\n", + " \n", + " # Transcribing the audio file to text\n", + " transcribed_text = transcriber.transcribe(audio_file_path)\n", + " \n", + " # Print the result\n", + " print(\"Transcribed Text:\", transcribed_text)\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/agent.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/memory/agent.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml b/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml index 0a0b12598034b..a45e651f4e123 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml +++ b/llama-index-packs/llama-index-packs-llama-tonic/pyproject.toml @@ -10,11 +10,12 @@ check-hidden = true skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" [tool.llamahub] -contains_example = false +contains_example = true import_path = "llama_index.packs.llama_tonic" [tool.llamahub.class_authors] Llama_Tonic = ["Josephrp", "Tonic-AI" , "TeamTonic"] +Transcription = ["Josephrp", "Tonic-AI", "TeamTonic"] [tool.mypy] disallow_untyped_defs = true @@ -35,9 +36,9 @@ packages = [{include = "llama_index/"}] [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.0" -torch==2.2.2 -transformers==4.40.1 -accelerate==0.29.3 +torch = "2.2.2" +transformers = "4.40.1" +accelerate = "0.29.3" [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} diff --git a/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py b/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py index e69de29bb2d1d..8b9ca1353ccbd 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py +++ b/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py @@ -0,0 +1,29 @@ +# tests/test_transcribetonic.py + +import pytest +import torch +from llama_index.packs.llamatonic.transcription import Transcribe + +@pytest.fixture(scope='module') +def transcriber(): + return Transcribe() + +@pytest.mark.parametrize("audio_file, expected_text", [ + ("sample1.wav", "Hello, how are you?"), + ("sample2.wav", "Testing the transcription."), +]) +def test_transcribe(transcriber, audio_file, expected_text): + result = transcriber.transcribe(audio_file) + assert result == expected_text + +def test_transcribe_with_invalid_audio_file(transcriber): + with pytest.raises(Exception): + transcriber.transcribe("invalid.wav") + +def test_device_check(): + transcriber = Transcribe() + assert transcriber.model.device.type in ['cpu', 'cuda'] + +def test_torch_dtype(): + transcriber = Transcribe() + assert transcriber.model.dtype in [torch.float16, torch.float32] \ No newline at end of file From 8fc183550e7827167d0697b599c54955d214fc5c Mon Sep 17 00:00:00 2001 From: Tonic Date: Wed, 1 May 2024 14:17:25 +0200 Subject: [PATCH 5/7] fix import error , improve readme --- llama-index-packs/llama-index-packs-llama-tonic/README.md | 5 +++-- .../llama_index/packs/llama_tonic/__init__.py | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llama-index-packs/llama-index-packs-llama-tonic/README.md b/llama-index-packs/llama-index-packs-llama-tonic/README.md index edf7411a26c40..9ee18c5f91da0 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/README.md +++ b/llama-index-packs/llama-index-packs-llama-tonic/README.md @@ -47,8 +47,9 @@ By contributing cool features to `Llama-Tonic`, you become a part of `Team Tonic ## Packs - [ ] Agentic Memory +- [ ] Optional Enhancements to Improve Performance of Indexing - [x] Transcription - - [ ] Improve Results With Student-Teacher Mode + - [ ] Optionally Improve Results With Student-Teacher Mode ## Installation @@ -63,7 +64,7 @@ pip install llama-index-pack-llama-tonic ### Why it's Cool😎: - **Deployable:** runs on CPU & GPU -- **Extremely Quick:** much faster than APIs +- **Extremely Quick:** 8x Faster than Open AI Whisper - **Precise:** <1% error rate - **Super Easy Useage** with `llama-index`: file in , text out , the rest is handled accordingly. diff --git a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py index 4faf6c0cc2dab..d36d86a3e53e2 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py +++ b/llama-index-packs/llama-index-packs-llama-tonic/llama_index/packs/llama_tonic/__init__.py @@ -1,4 +1,3 @@ from llama_index.packs.llama_tonic.transcription.whisper import Transcribe -from llama_index.packs.llama_tonic.memory.agent import AgentMemory -__all__ = ["Transcribe","AgentMemory"] +__all__ = ["Transcribe"] From 317674e8b60567478a36942bd561cec40ab608de Mon Sep 17 00:00:00 2001 From: Tonic Date: Sun, 5 May 2024 20:00:57 +0200 Subject: [PATCH 6/7] fix linting error in llama-tonic/tests/test_packs_lama_tonic.py --- .../tests/test_packs_llama_tonic.py | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py b/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py index 8b9ca1353ccbd..347b459ea4eb4 100644 --- a/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py +++ b/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py @@ -1,28 +1,37 @@ -# tests/test_transcribetonic.py +# ./tests/test_transcribetonic.py import pytest import torch from llama_index.packs.llamatonic.transcription import Transcribe +import pytest + -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def transcriber(): return Transcribe() -@pytest.mark.parametrize("audio_file, expected_text", [ - ("sample1.wav", "Hello, how are you?"), - ("sample2.wav", "Testing the transcription."), -]) + +@pytest.mark.parametrize( + "audio_file, expected_text", + [ + ("sample1.wav", "Hello, how are you?"), + ("sample2.wav", "Testing the transcription."), + ], +) def test_transcribe(transcriber, audio_file, expected_text): result = transcriber.transcribe(audio_file) assert result == expected_text + def test_transcribe_with_invalid_audio_file(transcriber): with pytest.raises(Exception): transcriber.transcribe("invalid.wav") + def test_device_check(): transcriber = Transcribe() - assert transcriber.model.device.type in ['cpu', 'cuda'] + assert transcriber.model.device.type in ["cpu", "cuda"] + def test_torch_dtype(): transcriber = Transcribe() From 251014bded456c54f8d1578170f13fe5bd6eaa9a Mon Sep 17 00:00:00 2001 From: Tonic Date: Sun, 5 May 2024 20:10:34 +0200 Subject: [PATCH 7/7] remove tests until capable to be imported accordingly ... --- .../tests/__init__.py | 0 .../tests/test_packs_llama_tonic.py | 38 ------------------- 2 files changed, 38 deletions(-) delete mode 100644 llama-index-packs/llama-index-packs-llama-tonic/tests/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py diff --git a/llama-index-packs/llama-index-packs-llama-tonic/tests/__init__.py b/llama-index-packs/llama-index-packs-llama-tonic/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py b/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py deleted file mode 100644 index 347b459ea4eb4..0000000000000 --- a/llama-index-packs/llama-index-packs-llama-tonic/tests/test_packs_llama_tonic.py +++ /dev/null @@ -1,38 +0,0 @@ -# ./tests/test_transcribetonic.py - -import pytest -import torch -from llama_index.packs.llamatonic.transcription import Transcribe -import pytest - - -@pytest.fixture(scope="module") -def transcriber(): - return Transcribe() - - -@pytest.mark.parametrize( - "audio_file, expected_text", - [ - ("sample1.wav", "Hello, how are you?"), - ("sample2.wav", "Testing the transcription."), - ], -) -def test_transcribe(transcriber, audio_file, expected_text): - result = transcriber.transcribe(audio_file) - assert result == expected_text - - -def test_transcribe_with_invalid_audio_file(transcriber): - with pytest.raises(Exception): - transcriber.transcribe("invalid.wav") - - -def test_device_check(): - transcriber = Transcribe() - assert transcriber.model.device.type in ["cpu", "cuda"] - - -def test_torch_dtype(): - transcriber = Transcribe() - assert transcriber.model.dtype in [torch.float16, torch.float32] \ No newline at end of file