Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(code quality): add black and ruff #56

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
__pycache__/
*.wav
keys.py
.venv/
.venv/
.DS_Store
ecout_env
44 changes: 30 additions & 14 deletions AudioRecorder.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import custom_speech_recognition as sr
import pyaudiowpatch as pyaudio
from datetime import datetime

import pyaudiowpatch as pyaudio

import custom_speech_recognition as sr

RECORD_TIMEOUT = 3
ENERGY_THRESHOLD = 1000
DYNAMIC_ENERGY_THRESHOLD = False


class BaseRecorder:
def __init__(self, source, source_name):
self.recorder = sr.Recognizer()
Expand All @@ -21,35 +24,48 @@ def adjust_for_noise(self, device_name, msg):
print(f"[INFO] Completed ambient noise adjustment for {device_name}.")

def record_into_queue(self, audio_queue):
def record_callback(_, audio:sr.AudioData) -> None:
def record_callback(_, audio: sr.AudioData) -> None:
data = audio.get_raw_data()
audio_queue.put((self.source_name, data, datetime.utcnow()))

self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT)
self.recorder.listen_in_background(
self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT
)


class DefaultMicRecorder(BaseRecorder):
def __init__(self):
super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
self.adjust_for_noise(
"Default Mic", "Please make some noise from the Default Mic..."
)


class DefaultSpeakerRecorder(BaseRecorder):
def __init__(self):
with pyaudio.PyAudio() as p:
wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])

default_speakers = p.get_device_info_by_index(
wasapi_info["defaultOutputDevice"]
)

if not default_speakers["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
if default_speakers["name"] in loopback["name"]:
default_speakers = loopback
break
else:
print("[ERROR] No loopback device found.")

source = sr.Microphone(speaker=True,
device_index= default_speakers["index"],
sample_rate=int(default_speakers["defaultSampleRate"]),
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
channels=default_speakers["maxInputChannels"])

source = sr.Microphone(
speaker=True,
device_index=default_speakers["index"],
sample_rate=int(default_speakers["defaultSampleRate"]),
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
channels=default_speakers["maxInputChannels"],
)
super().__init__(source=source, source_name="Speaker")
self.adjust_for_noise("Default Speaker", "Please make or play some noise from the Default Speaker...")
self.adjust_for_noise(
"Default Speaker",
"Please make or play some noise from the Default Speaker...",
)
66 changes: 39 additions & 27 deletions AudioTranscriber.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
import whisper
import torch
import wave
import io
import os
import threading
from tempfile import NamedTemporaryFile
import custom_speech_recognition as sr
import io
import wave
from datetime import timedelta
import pyaudiowpatch as pyaudio
from heapq import merge
from tempfile import NamedTemporaryFile

import pyaudiowpatch as pyaudio

import custom_speech_recognition as sr

PHRASE_TIMEOUT = 3.05

MAX_PHRASES = 10


class AudioTranscriber:
def __init__(self, mic_source, speaker_source, model):
self.transcript_data = {"You": [], "Speaker": []}
Expand All @@ -24,20 +25,20 @@ def __init__(self, mic_source, speaker_source, model):
"sample_rate": mic_source.SAMPLE_RATE,
"sample_width": mic_source.SAMPLE_WIDTH,
"channels": mic_source.channels,
"last_sample": bytes(),
"last_sample": b"",
"last_spoken": None,
"new_phrase": True,
"process_data_func": self.process_mic_data
"process_data_func": self.process_mic_data,
},
"Speaker": {
"sample_rate": speaker_source.SAMPLE_RATE,
"sample_width": speaker_source.SAMPLE_WIDTH,
"channels": speaker_source.channels,
"last_sample": bytes(),
"last_sample": b"",
"last_spoken": None,
"new_phrase": True,
"process_data_func": self.process_speaker_data
}
"process_data_func": self.process_speaker_data,
},
}

def transcribe_audio_queue(self, audio_queue):
Expand All @@ -46,7 +47,7 @@ def transcribe_audio_queue(self, audio_queue):
self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
source_info = self.audio_sources[who_spoke]

text = ''
text = ""
temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
temp_file.close()

Expand All @@ -55,29 +56,35 @@ def transcribe_audio_queue(self, audio_queue):

os.unlink(temp_file.name)

if text != '' and text.lower() != 'you':
if text != "" and text.lower() != "you":
self.update_transcript(who_spoke, text, time_spoken)
self.transcript_changed_event.set()

def update_last_sample_and_phrase_status(self, who_spoke, data, time_spoken):
source_info = self.audio_sources[who_spoke]
if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=PHRASE_TIMEOUT):
source_info["last_sample"] = bytes()
if source_info["last_spoken"] and time_spoken - source_info[
"last_spoken"
] > timedelta(seconds=PHRASE_TIMEOUT):
source_info["last_sample"] = b""
source_info["new_phrase"] = True
else:
source_info["new_phrase"] = False

source_info["last_sample"] += data
source_info["last_spoken"] = time_spoken
source_info["last_spoken"] = time_spoken

def process_mic_data(self, data, temp_file_name):
audio_data = sr.AudioData(data, self.audio_sources["You"]["sample_rate"], self.audio_sources["You"]["sample_width"])
audio_data = sr.AudioData(
data,
self.audio_sources["You"]["sample_rate"],
self.audio_sources["You"]["sample_width"],
)
wav_data = io.BytesIO(audio_data.get_wav_data())
with open(temp_file_name, 'w+b') as f:
with open(temp_file_name, "w+b") as f:
f.write(wav_data.read())

def process_speaker_data(self, data, temp_file_name):
with wave.open(temp_file_name, 'wb') as wf:
with wave.open(temp_file_name, "wb") as wf:
wf.setnchannels(self.audio_sources["Speaker"]["channels"])
p = pyaudio.PyAudio()
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
Expand All @@ -96,18 +103,23 @@ def update_transcript(self, who_spoke, text, time_spoken):
transcript[0] = (f"{who_spoke}: [{text}]\n\n", time_spoken)

def get_transcript(self):
combined_transcript = list(merge(
self.transcript_data["You"], self.transcript_data["Speaker"],
key=lambda x: x[1], reverse=True))
combined_transcript = list(
merge(
self.transcript_data["You"],
self.transcript_data["Speaker"],
key=lambda x: x[1],
reverse=True,
)
)
combined_transcript = combined_transcript[:MAX_PHRASES]
return "".join([t[0] for t in combined_transcript])

def clear_transcript_data(self):
self.transcript_data["You"].clear()
self.transcript_data["Speaker"].clear()

self.audio_sources["You"]["last_sample"] = bytes()
self.audio_sources["Speaker"]["last_sample"] = bytes()
self.audio_sources["You"]["last_sample"] = b""
self.audio_sources["Speaker"]["last_sample"] = b""

self.audio_sources["You"]["new_phrase"] = True
self.audio_sources["Speaker"]["new_phrase"] = True
self.audio_sources["Speaker"]["new_phrase"] = True
38 changes: 22 additions & 16 deletions GPTResponder.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
import time

import openai
from keys import OPENAI_API_KEY
from prompts import create_prompt, INITIAL_RESPONSE
import time

from prompts import INITIAL_RESPONSE, create_prompt

openai.api_key = OPENAI_API_KEY


def generate_response_from_transcript(transcript):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0301",
messages=[{"role": "system", "content": create_prompt(transcript)}],
temperature = 0.0
model="gpt-3.5-turbo-0301",
messages=[{"role": "system", "content": create_prompt(transcript)}],
temperature=0.0,
)
except Exception as e:
print(e)
return ''
return ""
full_response = response.choices[0].message.content
try:
return full_response.split('[')[1].split(']')[0]
except:
return ''

return full_response.split("[")[1].split("]")[0]
except IndexError:
return ""


class GPTResponder:
def __init__(self):
self.response = INITIAL_RESPONSE
Expand All @@ -31,14 +35,16 @@ def respond_to_transcriber(self, transcriber):
if transcriber.transcript_changed_event.is_set():
start_time = time.time()

transcriber.transcript_changed_event.clear()
transcriber.transcript_changed_event.clear()
transcript_string = transcriber.get_transcript()
response = generate_response_from_transcript(transcript_string)

end_time = time.time() # Measure end time
execution_time = end_time - start_time # Calculate the time it took to execute the function

if response != '':

# Calculate the time it took to execute the function
execution_time = end_time - start_time

if response != "":
self.response = response

remaining_time = self.response_interval - execution_time
Expand All @@ -48,4 +54,4 @@ def respond_to_transcriber(self, transcriber):
time.sleep(0.3)

def update_response_interval(self, interval):
self.response_interval = interval
self.response_interval = interval
21 changes: 21 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
lint:
@echo
ruff .
@echo
black --check --diff --color .
@echo
pip-audit

format:
ruff --silent --exit-zero --fix .
black .

precommit:
make lint
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably have instructions to install 'make' as well, chocolatey seems easiest

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it's built-in on MacOS and Linux, not sure about Windows.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Windows you have to install it
installing choco with the commad in README and using

choco install make 

would be good enough for instructions

make format

venv:
python3 -m venv ecout_env
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

python or python3?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think python3 is preferred since we are working with Python 3.x.x

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could somehow use either? For me I don't have python3 alias only python in my PATH.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I can change it to python instead of python3.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


install:
pip install -r requirements.txt
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the streamlined installation changes, let me know if it is necessary to split the PR.

26 changes: 22 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Ecoute is a live transcription tool that provides real-time transcripts for both

## 📖 Demo

https://github.com/SevaSk/ecoute/assets/50382291/8ac48927-8a26-49fd-80e9-48f980986208
<https://github.com/SevaSk/ecoute/assets/50382291/8ac48927-8a26-49fd-80e9-48f980986208>

Ecoute is designed to help users in their conversations by providing live transcriptions and generating contextually relevant responses. By leveraging the power of OpenAI's GPT-3.5, Ecoute aims to make communication more efficient and enjoyable.

Expand All @@ -18,18 +18,22 @@ Follow these steps to set up and run Ecoute on your local machine.
- Python >=3.8.0
- An OpenAI API key
- Windows OS (Not tested on others)
- FFmpeg
- FFmpeg

If FFmpeg is not installed in your system, you can follow the steps below to install it.

First, you need to install Chocolatey, a package manager for Windows. Open your PowerShell as Administrator and run the following command:

```
Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
```

Once Chocolatey is installed, you can install FFmpeg by running the following command in your PowerShell:

```
choco install ffmpeg-full
```

Please ensure that you run these commands in a PowerShell window with administrator privileges. If you face any issues during the installation, you can visit the official Chocolatey and FFmpeg websites for troubleshooting.

### 🔧 Installation
Expand All @@ -51,7 +55,7 @@ Please ensure that you run these commands in a PowerShell window with administra
```
pip install -r requirements.txt
```

4. Create a `keys.py` file in the ecoute directory and add your OpenAI API key:

- Option 1: You can utilize a command on your command prompt. Run the following command, ensuring to replace "API KEY" with your actual OpenAI API key:
Expand All @@ -61,10 +65,11 @@ Please ensure that you run these commands in a PowerShell window with administra
```

- Option 2: You can create the keys.py file manually. Open up your text editor of choice and enter the following content:

```
OPENAI_API_KEY="API KEY"
```

Replace "API KEY" with your actual OpenAI API key. Save this file as keys.py within the ecoute directory.

### 🎬 Running Ecoute
Expand Down Expand Up @@ -102,3 +107,16 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
## 🤝 Contributing

Contributions are welcome! Feel free to open issues or submit pull requests to improve Ecoute.

### Installation

1. `make venv`
2. Activate the venv: `ecout_venv`
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ecout_env\Scripts\activate ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, I'll add one script for MacOS and one for Windows.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sorry. I tried adding a script, but it's not possible to activate the venv with a make command. I'll add both options to the README: for MacOS it's source ecout_env/bin/activate

3. `make install`

### Code quality

Before submitting a pull request run `make precommit` and resolve any issues. Additionally, here are some useful commands:

- `make lint`
- `make format`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the streamlined installation changes, let me know if it is necessary to split the PR.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh Sry! Got confused with the Installation section above.