Skip to content

Commit

Permalink
mvp
Browse files Browse the repository at this point in the history
  • Loading branch information
sshh12 committed Mar 15, 2023
1 parent f2f29ae commit d08687b
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,9 @@ dmypy.json

# Pyre type checker
.pyre/

ffcache*
env.bat
*.mp3
*.wav
test.py
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"editor.formatOnSave": true,
"python.formatting.provider": "black",
"python.formatting.blackArgs": ["--line-length", "120"],
"python.linting.pylintUseMinimalCheckers": false,
"editor.showUnused": true
}
42 changes: 42 additions & 0 deletions convo/actors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import List
from abc import ABC, abstractmethod

from convo.audio_input import WhisperMicrophone
from convo.audio_output import TTSSpeaker
from convo.openai_io import OpenAIChatCompletion


class ChatAgent(ABC):
@abstractmethod
def get_response(self, transcript: List[str]) -> str:
pass


class MicrophoneInSpeakerTTSOut(ChatAgent):
def __init__(self):
self.mic = WhisperMicrophone()
self.speaker = TTSSpeaker()

def get_response(self, transcript: List[str]) -> str:
if len(transcript) > 0:
self.speaker.play(transcript[-1])
return self.mic.get_transcription()


class TerminalInPrintOut(ChatAgent):
def get_response(self, transcript: List[str]) -> str:
if len(transcript) > 0:
print(transcript[-1])
return input(" response > ")


class OpenAIChat(ChatAgent):
def __init__(self):
self.openai_chat = OpenAIChatCompletion()

def get_response(self, transcript: List[str]) -> str:
if len(transcript) > 0:
response = self.openai_chat.get_response(transcript)
else:
response = "Generic"
return response
29 changes: 29 additions & 0 deletions convo/audio_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import io
import os
import tempfile

from pydub import AudioSegment
import speech_recognition as sr
import whisper


class WhisperMicrophone:
def __init__(self):
self.audio_model = whisper.load_model("tiny")
self.recognizer = sr.Recognizer()
self.recognizer.energy_threshold = 500
self.recognizer.pause_threshold = 0.8
self.recognizer.dynamic_energy_threshold = False

def get_transcription(self) -> str:
with sr.Microphone(sample_rate=16000) as source:
print("Waiting for mic...")
with tempfile.TemporaryDirectory() as tmp:
tmp_path = os.path.join(tmp, "mic.wav")
audio = self.recognizer.listen(source)
data = io.BytesIO(audio.get_wav_data())
audio_clip = AudioSegment.from_file(data)
audio_clip.export(tmp_path, format="wav")
result = self.audio_model.transcribe(tmp_path, language="english")
predicted_text = result["text"]
return predicted_text
37 changes: 37 additions & 0 deletions convo/audio_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
import tempfile
import subprocess

from gtts import gTTS
import pyaudio
import wave


class TTSSpeaker:
def __init__(self):
self.chunk_size = 1024

def play(self, text: str):
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmp:
tmp_mp3 = os.path.join(tmp, "tts.mp3")
tmp_wav = os.path.join(tmp, "tts.wav")
tts = gTTS(text, lang="en")
tts.save(tmp_mp3)
subprocess.call(["ffmpeg", "-y", "-i", tmp_mp3, tmp_wav])

wf = wave.open(tmp_wav, "rb")
audio = pyaudio.PyAudio()
stream = audio.open(
format=audio.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
)

data = wf.readframes(self.chunk_size)
while data != b"":
stream.write(data)
data = wf.readframes(self.chunk_size)

stream.close()
audio.terminate()
20 changes: 20 additions & 0 deletions convo/openai_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from typing import List
import os
import openai


class OpenAIChatCompletion:
def __init__(self):
openai.api_key = os.environ["OPENAI_KEY"]

def get_response(self, transcript: List[str]) -> str:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
for i, text in enumerate(reversed(transcript)):
messages.insert(1, {"role": "user" if i % 2 == 0 else "assistant", "content": text})
output = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages,
)
return output["choices"][0]["message"]["content"]
12 changes: 12 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
black==23.1.0

gTTS
openai

git+https://github.com/openai/whisper.git
SpeechRecognition
pyaudio
pydub
--extra-index-url https://download.pytorch.org/whl/cu116
torch
torchaudio
17 changes: 17 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from setuptools import setup

with open("requirements.txt") as f:
required = f.read().splitlines()


setup(
name="convo",
version="0.0.0",
description="",
url="https://github.com/sshh12/csgo-match-prediction",
author="Shrivu Shankar",
license="MIT",
packages=["convo"],
include_package_data=True,
install_requires=required,
)

0 comments on commit d08687b

Please sign in to comment.