mvp

sshh12 · Mar 15, 2023 · d08687b · d08687b
1 parent f2f29ae
commit d08687b
Show file tree

Hide file tree

Showing 8 changed files with 170 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,9 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+ffcache*
+env.bat
+*.mp3
+*.wav
+test.py
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+  "editor.formatOnSave": true,
+  "python.formatting.provider": "black",
+  "python.formatting.blackArgs": ["--line-length", "120"],
+  "python.linting.pylintUseMinimalCheckers": false,
+  "editor.showUnused": true
+}
diff --git a/convo/actors.py b/convo/actors.py
@@ -0,0 +1,42 @@
+from typing import List
+from abc import ABC, abstractmethod
+
+from convo.audio_input import WhisperMicrophone
+from convo.audio_output import TTSSpeaker
+from convo.openai_io import OpenAIChatCompletion
+
+
+class ChatAgent(ABC):
+    @abstractmethod
+    def get_response(self, transcript: List[str]) -> str:
+        pass
+
+
+class MicrophoneInSpeakerTTSOut(ChatAgent):
+    def __init__(self):
+        self.mic = WhisperMicrophone()
+        self.speaker = TTSSpeaker()
+
+    def get_response(self, transcript: List[str]) -> str:
+        if len(transcript) > 0:
+            self.speaker.play(transcript[-1])
+        return self.mic.get_transcription()
+
+
+class TerminalInPrintOut(ChatAgent):
+    def get_response(self, transcript: List[str]) -> str:
+        if len(transcript) > 0:
+            print(transcript[-1])
+        return input(" response > ")
+
+
+class OpenAIChat(ChatAgent):
+    def __init__(self):
+        self.openai_chat = OpenAIChatCompletion()
+
+    def get_response(self, transcript: List[str]) -> str:
+        if len(transcript) > 0:
+            response = self.openai_chat.get_response(transcript)
+        else:
+            response = "Generic"
+        return response
diff --git a/convo/audio_input.py b/convo/audio_input.py
@@ -0,0 +1,29 @@
+import io
+import os
+import tempfile
+
+from pydub import AudioSegment
+import speech_recognition as sr
+import whisper
+
+
+class WhisperMicrophone:
+    def __init__(self):
+        self.audio_model = whisper.load_model("tiny")
+        self.recognizer = sr.Recognizer()
+        self.recognizer.energy_threshold = 500
+        self.recognizer.pause_threshold = 0.8
+        self.recognizer.dynamic_energy_threshold = False
+
+    def get_transcription(self) -> str:
+        with sr.Microphone(sample_rate=16000) as source:
+            print("Waiting for mic...")
+            with tempfile.TemporaryDirectory() as tmp:
+                tmp_path = os.path.join(tmp, "mic.wav")
+                audio = self.recognizer.listen(source)
+                data = io.BytesIO(audio.get_wav_data())
+                audio_clip = AudioSegment.from_file(data)
+                audio_clip.export(tmp_path, format="wav")
+                result = self.audio_model.transcribe(tmp_path, language="english")
+            predicted_text = result["text"]
+        return predicted_text
diff --git a/convo/audio_output.py b/convo/audio_output.py
@@ -0,0 +1,37 @@
+import os
+import tempfile
+import subprocess
+
+from gtts import gTTS
+import pyaudio
+import wave
+
+
+class TTSSpeaker:
+    def __init__(self):
+        self.chunk_size = 1024
+
+    def play(self, text: str):
+        with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
+            tmp_mp3 = os.path.join(tmp, "tts.mp3")
+            tmp_wav = os.path.join(tmp, "tts.wav")
+            tts = gTTS(text, lang="en")
+            tts.save(tmp_mp3)
+            subprocess.call(["ffmpeg", "-y", "-i", tmp_mp3, tmp_wav])
+
+            wf = wave.open(tmp_wav, "rb")
+            audio = pyaudio.PyAudio()
+            stream = audio.open(
+                format=audio.get_format_from_width(wf.getsampwidth()),
+                channels=wf.getnchannels(),
+                rate=wf.getframerate(),
+                output=True,
+            )
+
+            data = wf.readframes(self.chunk_size)
+            while data != b"":
+                stream.write(data)
+                data = wf.readframes(self.chunk_size)
+
+            stream.close()
+            audio.terminate()
diff --git a/convo/openai_io.py b/convo/openai_io.py
@@ -0,0 +1,20 @@
+from typing import List
+import os
+import openai
+
+
+class OpenAIChatCompletion:
+    def __init__(self):
+        openai.api_key = os.environ["OPENAI_KEY"]
+
+    def get_response(self, transcript: List[str]) -> str:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+        ]
+        for i, text in enumerate(reversed(transcript)):
+            messages.insert(1, {"role": "user" if i % 2 == 0 else "assistant", "content": text})
+        output = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=messages,
+        )
+        return output["choices"][0]["message"]["content"]
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,12 @@
+black==23.1.0
+
+gTTS
+openai
+
+git+https://github.com/openai/whisper.git
+SpeechRecognition
+pyaudio
+pydub
+--extra-index-url https://download.pytorch.org/whl/cu116
+torch 
+torchaudio
diff --git a/setup.py b/setup.py
@@ -0,0 +1,17 @@
+from setuptools import setup
+
+with open("requirements.txt") as f:
+    required = f.read().splitlines()
+
+
+setup(
+    name="convo",
+    version="0.0.0",
+    description="",
+    url="https://github.com/sshh12/csgo-match-prediction",
+    author="Shrivu Shankar",
+    license="MIT",
+    packages=["convo"],
+    include_package_data=True,
+    install_requires=required,
+)
-Original file line number
+Diff line change
@@ Expand Up / @@ -127,3 +127,9 @@ dmypy.json @@
     # Pyre type checker
     .pyre/
+    ffcache*
+    env.bat
+    *.mp3
+    *.wav
+    test.py