SevaSk · zarifpour · Jun 1, 2023 · Jun 1, 2023 · Jun 1, 2023 · Jun 1, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,6 @@
 __pycache__/
 *.wav
 keys.py
-.venv/
+.venv/
+.DS_Store
+ecout_env
diff --git a/AudioRecorder.py b/AudioRecorder.py
@@ -1,11 +1,14 @@
-import custom_speech_recognition as sr
-import pyaudiowpatch as pyaudio
 from datetime import datetime
 
+import pyaudiowpatch as pyaudio
+
+import custom_speech_recognition as sr
+
 RECORD_TIMEOUT = 3
 ENERGY_THRESHOLD = 1000
 DYNAMIC_ENERGY_THRESHOLD = False
 
+
 class BaseRecorder:
  def __init__(self, source, source_name):
  self.recorder = sr.Recognizer()
@@ -21,35 +24,48 @@ def adjust_for_noise(self, device_name, msg):
  print(f"[INFO] Completed ambient noise adjustment for {device_name}.")
 
  def record_into_queue(self, audio_queue):
- def record_callback(_, audio:sr.AudioData) -> None:
+ def record_callback(_, audio: sr.AudioData) -> None:
  data = audio.get_raw_data()
  audio_queue.put((self.source_name, data, datetime.utcnow()))
 
- self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT)
+ self.recorder.listen_in_background(
+ self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT
+ )
+
 
 class DefaultMicRecorder(BaseRecorder):
  def __init__(self):
  super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
- self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
+ self.adjust_for_noise(
+ "Default Mic", "Please make some noise from the Default Mic..."
+ )
+
 
 class DefaultSpeakerRecorder(BaseRecorder):
  def __init__(self):
  with pyaudio.PyAudio() as p:
  wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
- default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
-
+ default_speakers = p.get_device_info_by_index(
+ wasapi_info["defaultOutputDevice"]
+ )
+
  if not default_speakers["isLoopbackDevice"]:
  for loopback in p.get_loopback_device_info_generator():
  if default_speakers["name"] in loopback["name"]:
  default_speakers = loopback
  break
  else:
  print("[ERROR] No loopback device found.")
-
- source = sr.Microphone(speaker=True,
- device_index= default_speakers["index"],
- sample_rate=int(default_speakers["defaultSampleRate"]),
- chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
- channels=default_speakers["maxInputChannels"])
+
+ source = sr.Microphone(
+ speaker=True,
+ device_index=default_speakers["index"],
+ sample_rate=int(default_speakers["defaultSampleRate"]),
+ chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
+ channels=default_speakers["maxInputChannels"],
+ )
  super().__init__(source=source, source_name="Speaker")
- self.adjust_for_noise("Default Speaker", "Please make or play some noise from the Default Speaker...")
+ self.adjust_for_noise(
+ "Default Speaker",
+ "Please make or play some noise from the Default Speaker...",
+ )
diff --git a/AudioTranscriber.py b/AudioTranscriber.py
@@ -1,19 +1,20 @@
-import whisper
-import torch
-import wave
+import io
 import os
 import threading
-from tempfile import NamedTemporaryFile
-import custom_speech_recognition as sr
-import io
+import wave
 from datetime import timedelta
-import pyaudiowpatch as pyaudio
 from heapq import merge
+from tempfile import NamedTemporaryFile
+
+import pyaudiowpatch as pyaudio
+
+import custom_speech_recognition as sr
 
 PHRASE_TIMEOUT = 3.05
 
 MAX_PHRASES = 10
 
+
 class AudioTranscriber:
  def __init__(self, mic_source, speaker_source, model):
  self.transcript_data = {"You": [], "Speaker": []}
@@ -24,20 +25,20 @@ def __init__(self, mic_source, speaker_source, model):
  "sample_rate": mic_source.SAMPLE_RATE,
  "sample_width": mic_source.SAMPLE_WIDTH,
  "channels": mic_source.channels,
- "last_sample": bytes(),
+ "last_sample": b"",
  "last_spoken": None,
  "new_phrase": True,
- "process_data_func": self.process_mic_data
+ "process_data_func": self.process_mic_data,
  },
  "Speaker": {
  "sample_rate": speaker_source.SAMPLE_RATE,
  "sample_width": speaker_source.SAMPLE_WIDTH,
  "channels": speaker_source.channels,
- "last_sample": bytes(),
+ "last_sample": b"",
  "last_spoken": None,
  "new_phrase": True,
- "process_data_func": self.process_speaker_data
- }
+ "process_data_func": self.process_speaker_data,
+ },
  }
 
  def transcribe_audio_queue(self, audio_queue):
@@ -46,7 +47,7 @@ def transcribe_audio_queue(self, audio_queue):
  self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
  source_info = self.audio_sources[who_spoke]
 
- text = ''
+ text = ""
  temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
  temp_file.close()
 
@@ -55,29 +56,35 @@ def transcribe_audio_queue(self, audio_queue):
 
  os.unlink(temp_file.name)
 
- if text != '' and text.lower() != 'you':
+ if text != "" and text.lower() != "you":
  self.update_transcript(who_spoke, text, time_spoken)
  self.transcript_changed_event.set()
 
  def update_last_sample_and_phrase_status(self, who_spoke, data, time_spoken):
  source_info = self.audio_sources[who_spoke]
- if source_info["last_spoken"] and time_spoken - source_info["last_spoken"] > timedelta(seconds=PHRASE_TIMEOUT):
- source_info["last_sample"] = bytes()
+ if source_info["last_spoken"] and time_spoken - source_info[
+ "last_spoken"
+ ] > timedelta(seconds=PHRASE_TIMEOUT):
+ source_info["last_sample"] = b""
  source_info["new_phrase"] = True
  else:
  source_info["new_phrase"] = False
 
  source_info["last_sample"] += data
- source_info["last_spoken"] = time_spoken 
+ source_info["last_spoken"] = time_spoken
 
  def process_mic_data(self, data, temp_file_name):
- audio_data = sr.AudioData(data, self.audio_sources["You"]["sample_rate"], self.audio_sources["You"]["sample_width"])
+ audio_data = sr.AudioData(
+ data,
+ self.audio_sources["You"]["sample_rate"],
+ self.audio_sources["You"]["sample_width"],
+ )
  wav_data = io.BytesIO(audio_data.get_wav_data())
- with open(temp_file_name, 'w+b') as f:
+ with open(temp_file_name, "w+b") as f:
  f.write(wav_data.read())
 
  def process_speaker_data(self, data, temp_file_name):
- with wave.open(temp_file_name, 'wb') as wf:
+ with wave.open(temp_file_name, "wb") as wf:
  wf.setnchannels(self.audio_sources["Speaker"]["channels"])
  p = pyaudio.PyAudio()
  wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
@@ -96,18 +103,23 @@ def update_transcript(self, who_spoke, text, time_spoken):
  transcript[0] = (f"{who_spoke}: [{text}]\n\n", time_spoken)
 
  def get_transcript(self):
- combined_transcript = list(merge(
- self.transcript_data["You"], self.transcript_data["Speaker"], 
- key=lambda x: x[1], reverse=True))
+ combined_transcript = list(
+ merge(
+ self.transcript_data["You"],
+ self.transcript_data["Speaker"],
+ key=lambda x: x[1],
+ reverse=True,
+ )
+ )
  combined_transcript = combined_transcript[:MAX_PHRASES]
  return "".join([t[0] for t in combined_transcript])
- 
+
  def clear_transcript_data(self):
  self.transcript_data["You"].clear()
  self.transcript_data["Speaker"].clear()
 
- self.audio_sources["You"]["last_sample"] = bytes()
- self.audio_sources["Speaker"]["last_sample"] = bytes()
+ self.audio_sources["You"]["last_sample"] = b""
+ self.audio_sources["Speaker"]["last_sample"] = b""
 
  self.audio_sources["You"]["new_phrase"] = True
- self.audio_sources["Speaker"]["new_phrase"] = True
+ self.audio_sources["Speaker"]["new_phrase"] = True
diff --git a/GPTResponder.py b/GPTResponder.py
@@ -1,26 +1,30 @@
+import time
+
 import openai
 from keys import OPENAI_API_KEY
-from prompts import create_prompt, INITIAL_RESPONSE
-import time
+
+from prompts import INITIAL_RESPONSE, create_prompt
 
 openai.api_key = OPENAI_API_KEY
 
+
 def generate_response_from_transcript(transcript):
  try:
  response = openai.ChatCompletion.create(
-  model="gpt-3.5-turbo-0301",
-  messages=[{"role": "system", "content": create_prompt(transcript)}],
-  temperature = 0.0
+ model="gpt-3.5-turbo-0301",
+ messages=[{"role": "system", "content": create_prompt(transcript)}],
+ temperature=0.0,
  )
  except Exception as e:
  print(e)
- return ''
+ return ""
  full_response = response.choices[0].message.content
  try:
- return full_response.split('[')[1].split(']')[0]
- except:
- return ''
-
+ return full_response.split("[")[1].split("]")[0]
+ except IndexError:
+ return ""
+
+
 class GPTResponder:
  def __init__(self):
  self.response = INITIAL_RESPONSE
@@ -31,14 +35,16 @@ def respond_to_transcriber(self, transcriber):
  if transcriber.transcript_changed_event.is_set():
  start_time = time.time()
 
- transcriber.transcript_changed_event.clear() 
+ transcriber.transcript_changed_event.clear()
  transcript_string = transcriber.get_transcript()
  response = generate_response_from_transcript(transcript_string)
- 
+
  end_time = time.time() # Measure end time
- execution_time = end_time - start_time # Calculate the time it took to execute the function
-
- if response != '':
+
+ # Calculate the time it took to execute the function
+ execution_time = end_time - start_time
+
+ if response != "":
  self.response = response
 
  remaining_time = self.response_interval - execution_time
@@ -48,4 +54,4 @@ def respond_to_transcriber(self, transcriber):
  time.sleep(0.3)
 
  def update_response_interval(self, interval):
- self.response_interval = interval
+ self.response_interval = interval
diff --git a/Makefile b/Makefile
@@ -0,0 +1,21 @@
+lint:
+ @echo
+ ruff .
+ @echo
+ black --check --diff --color .
+ @echo
+ pip-audit
+
+format:
+ ruff --silent --exit-zero --fix .
+ black .
+
+precommit:
+ make lint
+ make format
+
+venv:
+ python3 -m venv ecout_env
+
+install:
+ pip install -r requirements.txt
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ Ecoute is a live transcription tool that provides real-time transcripts for both
 
 ## 📖 Demo
 
-https://github.com/SevaSk/ecoute/assets/50382291/8ac48927-8a26-49fd-80e9-48f980986208
+<https://github.com/SevaSk/ecoute/assets/50382291/8ac48927-8a26-49fd-80e9-48f980986208>
 
 Ecoute is designed to help users in their conversations by providing live transcriptions and generating contextually relevant responses. By leveraging the power of OpenAI's GPT-3.5, Ecoute aims to make communication more efficient and enjoyable.
 
@@ -18,18 +18,22 @@ Follow these steps to set up and run Ecoute on your local machine.
 - Python >=3.8.0
 - An OpenAI API key
 - Windows OS (Not tested on others)
-- FFmpeg 
+- FFmpeg
 
 If FFmpeg is not installed in your system, you can follow the steps below to install it.
 
 First, you need to install Chocolatey, a package manager for Windows. Open your PowerShell as Administrator and run the following command:
+
 ```
 Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
 ```
+
 Once Chocolatey is installed, you can install FFmpeg by running the following command in your PowerShell:
+
 ```
 choco install ffmpeg-full
 ```
+
 Please ensure that you run these commands in a PowerShell window with administrator privileges. If you face any issues during the installation, you can visit the official Chocolatey and FFmpeg websites for troubleshooting.
 
 ### 🔧 Installation
@@ -51,7 +55,7 @@ Please ensure that you run these commands in a PowerShell window with administra
  ```
  pip install -r requirements.txt
  ```
- 
+
 4. Create a `keys.py` file in the ecoute directory and add your OpenAI API key:
 
  - Option 1: You can utilize a command on your command prompt. Run the following command, ensuring to replace "API KEY" with your actual OpenAI API key:
@@ -61,10 +65,11 @@ Please ensure that you run these commands in a PowerShell window with administra
  ```
 
  - Option 2: You can create the keys.py file manually. Open up your text editor of choice and enter the following content:
- 
+
  ```
  OPENAI_API_KEY="API KEY"
  ```
+
  Replace "API KEY" with your actual OpenAI API key. Save this file as keys.py within the ecoute directory.
 
 ### 🎬 Running Ecoute
@@ -102,3 +107,16 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 ## 🤝 Contributing
 
 Contributions are welcome! Feel free to open issues or submit pull requests to improve Ecoute.
+
+### Installation
+
+1. `make venv`
+2. Activate the venv: `ecout_venv`
+3. `make install`
+
+### Code quality
+
+Before submitting a pull request run `make precommit` and resolve any issues. Additionally, here are some useful commands:
+
+- `make lint`
+- `make format`