From 68dcce4b7e9cf406666707bc8fea9ae8b17101f6 Mon Sep 17 00:00:00 2001 From: Shrivu Shankar Date: Sun, 17 Sep 2023 09:41:45 -0700 Subject: [PATCH] update --- .vscode/settings.json | 7 ++- README.md | 22 ++++++- examples/keyboard_chat_with_gpt.py | 27 +++++++++ examples/twilio_ngrok_ml_rhyme_hotline.py | 54 +++++++++++++++++ examples/twilio_ngrok_pizza_order.py | 72 +++++++++++++++++++++++ llm_convo/agents.py | 12 ++-- llm_convo/conversation.py | 2 +- llm_convo/openai_io.py | 9 +-- llm_convo/twilio_io.py | 2 +- requirements.txt | 23 ++++---- setup.py | 4 +- 11 files changed, 205 insertions(+), 29 deletions(-) create mode 100644 examples/keyboard_chat_with_gpt.py create mode 100644 examples/twilio_ngrok_ml_rhyme_hotline.py create mode 100644 examples/twilio_ngrok_pizza_order.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 148d41c..dd48873 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,7 +1,10 @@ { "editor.formatOnSave": true, - "python.formatting.provider": "black", + "python.formatting.provider": "none", "python.formatting.blackArgs": ["--line-length", "120"], "python.linting.pylintUseMinimalCheckers": false, - "editor.showUnused": true + "editor.showUnused": true, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + } } diff --git a/README.md b/README.md index eea6d9f..359737f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Twilio Webhook -> Flask app -> Twilio Media Stream (websocket) -> Whisper -> Cha 2. Environment Variables ``` -OPENAI_KEY= +OPENAI_API_KEY= TWILIO_ACCOUNT_SID= TWILIO_AUTH_TOKEN= TWILIO_PHONE_NUMBER= @@ -20,6 +20,26 @@ TWILIO_PHONE_NUMBER= ### Demo +#### Basic Text Chat + +Try `python examples\keyboard_chat_with_gpt.py` to chat with GPT through terminal. + +#### Twilio Helpline + +Try `python examples\twilio_ngrok_ml_rhyme_hotline.py --preload_whisper --start_ngrok`. This requires whisper installed locally. + +This will create an ngrok tunnel and provide a webhook URL to point to in Twilio settings for a purchased phone number. + +chrome_VZSfJHN6FV + +#### Twilio Pizza Order + +Try `python examples\twilio_ngrok_pizza_order.py --preload_whisper --start_ngrok --phone_number "+1.........."`. This requires whisper installed locally. + +This will create an ngrok tunnel and provide a webhook URL to point to in Twilio settings for a purchased phone number. Once the webhook is updated, it will start an outgoing call to the provided phone number. + +#### Code Snippets + Setup a Haiku hotline with Twilio that can be called like any other phone number. ```python diff --git a/examples/keyboard_chat_with_gpt.py b/examples/keyboard_chat_with_gpt.py new file mode 100644 index 0000000..08ff22a --- /dev/null +++ b/examples/keyboard_chat_with_gpt.py @@ -0,0 +1,27 @@ +from gevent import monkey + +monkey.patch_all() + +import logging +import argparse +import tempfile +import os +from llm_convo.agents import OpenAIChat, TerminalInPrintOut +from llm_convo.conversation import run_conversation + + +def main(model): + agent_a = OpenAIChat( + system_prompt="You are a machine learning assistant. Answer the users questions about machine learning with short rhymes. Ask follow up questions when needed to help clarify their question.", + init_phrase="Hello! Welcome to the Machine Learning hotline, how can I help?", + model=model, + ) + agent_b = TerminalInPrintOut() + run_conversation(agent_a, agent_b) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str, default="gpt-3.5-turbo") + args = parser.parse_args() + main(args.model) diff --git a/examples/twilio_ngrok_ml_rhyme_hotline.py b/examples/twilio_ngrok_ml_rhyme_hotline.py new file mode 100644 index 0000000..e0f2111 --- /dev/null +++ b/examples/twilio_ngrok_ml_rhyme_hotline.py @@ -0,0 +1,54 @@ +from gevent import monkey + +monkey.patch_all() + +import logging +import argparse +import tempfile +import os +import time +from llm_convo.agents import OpenAIChat, TwilioCaller +from llm_convo.audio_input import get_whisper_model +from llm_convo.twilio_io import TwilioServer +from llm_convo.conversation import run_conversation +from pyngrok import ngrok + + +def main(port, remote_host, start_ngrok): + if start_ngrok: + ngrok_http = ngrok.connect(port) + remote_host = ngrok_http.public_url.split("//")[1] + + static_dir = os.path.join(tempfile.gettempdir(), "twilio_static") + os.makedirs(static_dir, exist_ok=True) + + logging.info(f"Starting server at {remote_host} from local:{port}, serving static content from {static_dir}") + logging.info(f"Set call webhook to https://{remote_host}/incoming-voice") + + tws = TwilioServer(remote_host=remote_host, port=port, static_dir=static_dir) + tws.start() + agent_a = OpenAIChat( + system_prompt="You are a machine learning assistant. Answer the users questions about machine learning with short rhymes. Ask follow up questions when needed to help clarify their question.", + init_phrase="Hello! Welcome to the Machine Learning hotline, how can I help?", + ) + + def run_chat(sess): + agent_b = TwilioCaller(sess, thinking_phrase="One moment.") + while not agent_b.session.media_stream_connected(): + time.sleep(0.1) + run_conversation(agent_a, agent_b) + + tws.on_session = run_chat + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + parser = argparse.ArgumentParser() + parser.add_argument("--preload_whisper", action="store_true") + parser.add_argument("--start_ngrok", action="store_true") + parser.add_argument("--port", type=int, default=8080) + parser.add_argument("--remote_host", type=str, default="localhost") + args = parser.parse_args() + if args.preload_whisper: + get_whisper_model() + main(args.port, args.remote_host, args.start_ngrok) diff --git a/examples/twilio_ngrok_pizza_order.py b/examples/twilio_ngrok_pizza_order.py new file mode 100644 index 0000000..0c6ef4a --- /dev/null +++ b/examples/twilio_ngrok_pizza_order.py @@ -0,0 +1,72 @@ +from gevent import monkey + +monkey.patch_all() + +import logging +import argparse +import tempfile +import os +import time +import sys +from llm_convo.agents import OpenAIChat, TwilioCaller +from llm_convo.audio_input import get_whisper_model +from llm_convo.twilio_io import TwilioServer +from llm_convo.conversation import run_conversation +from pyngrok import ngrok + + +def main(port, remote_host, start_ngrok, phone_number): + if start_ngrok: + ngrok_http = ngrok.connect(port) + remote_host = ngrok_http.public_url.split("//")[1] + + static_dir = os.path.join(tempfile.gettempdir(), "twilio_static") + os.makedirs(static_dir, exist_ok=True) + + logging.info( + f"Starting server at {remote_host} from local:{port}, serving static content from {static_dir}, will call {phone_number}" + ) + logging.info(f"Set call webhook to https://{remote_host}/incoming-voice") + + input(" >>> Press enter to start the call after ensuring the webhook is set. <<< ") + + tws = TwilioServer(remote_host=remote_host, port=port, static_dir=static_dir) + tws.start() + agent_a = OpenAIChat( + system_prompt=""" + You are an ordering bot that is going to call a pizza place an order a pizza. + When you need to say numbers space them out (e.g. 1 2 3) and do not respond with abbreviations. + If they ask for information not known, make something up that's reasonable. + + The customer's details are: + * Address: 1234 Candyland Road, Apt 506 + * Credit Card: 1234 5555 8888 9999 (CVV: 010) + * Name: Bob Joe + * Order: 1 large pizza with only pepperoni + """, + init_phrase="Hi, I would like to order a pizza.", + ) + + def run_chat(sess): + agent_b = TwilioCaller(sess, thinking_phrase="One moment.") + while not agent_b.session.media_stream_connected(): + time.sleep(0.1) + run_conversation(agent_a, agent_b) + sys.exit(0) + + tws.on_session = run_chat + tws.start_call(phone_number) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + parser = argparse.ArgumentParser() + parser.add_argument("--phone_number", type=str) + parser.add_argument("--preload_whisper", action="store_true") + parser.add_argument("--start_ngrok", action="store_true") + parser.add_argument("--port", type=int, default=8080) + parser.add_argument("--remote_host", type=str, default="localhost") + args = parser.parse_args() + if args.preload_whisper: + get_whisper_model() + main(args.port, args.remote_host, args.start_ngrok, args.phone_number) diff --git a/llm_convo/agents.py b/llm_convo/agents.py index 143d863..81e9a86 100644 --- a/llm_convo/agents.py +++ b/llm_convo/agents.py @@ -1,10 +1,10 @@ from typing import List, Optional from abc import ABC, abstractmethod -from convo.audio_input import WhisperMicrophone -from convo.audio_output import TTSClient, GoogleTTS -from convo.openai_io import OpenAIChatCompletion -from convo.twilio_io import TwilioCallSession +from llm_convo.audio_input import WhisperMicrophone +from llm_convo.audio_output import TTSClient, GoogleTTS +from llm_convo.openai_io import OpenAIChatCompletion +from llm_convo.twilio_io import TwilioCallSession class ChatAgent(ABC): @@ -35,8 +35,8 @@ def get_response(self, transcript: List[str]) -> str: class OpenAIChat(ChatAgent): - def __init__(self, system_prompt: str, init_phrase: Optional[str] = None): - self.openai_chat = OpenAIChatCompletion(system_prompt=system_prompt) + def __init__(self, system_prompt: str, init_phrase: Optional[str] = None, model: Optional[str] = None): + self.openai_chat = OpenAIChatCompletion(system_prompt=system_prompt, model=model) self.init_phrase = init_phrase def get_response(self, transcript: List[str]) -> str: diff --git a/llm_convo/conversation.py b/llm_convo/conversation.py index 92d15b2..32e5397 100644 --- a/llm_convo/conversation.py +++ b/llm_convo/conversation.py @@ -1,4 +1,4 @@ -from convo.agents import ChatAgent +from llm_convo.agents import ChatAgent def run_conversation(agent_a: ChatAgent, agent_b: ChatAgent): diff --git a/llm_convo/openai_io.py b/llm_convo/openai_io.py index 881ea97..9168063 100644 --- a/llm_convo/openai_io.py +++ b/llm_convo/openai_io.py @@ -1,13 +1,14 @@ -from typing import List +from typing import List, Optional import os import openai -openai.api_key = os.environ["OPENAI_KEY"] +openai.api_key = os.environ["OPENAI_API_KEY"] class OpenAIChatCompletion: - def __init__(self, system_prompt: str): + def __init__(self, system_prompt: str, model: Optional[str] = None): self.system_prompt = system_prompt + self.model = model def get_response(self, transcript: List[str]) -> str: messages = [ @@ -16,7 +17,7 @@ def get_response(self, transcript: List[str]) -> str: for i, text in enumerate(reversed(transcript)): messages.insert(1, {"role": "user" if i % 2 == 0 else "assistant", "content": text}) output = openai.ChatCompletion.create( - model="gpt-3.5-turbo", + model="gpt-3.5-turbo" if self.model is None else self.model, messages=messages, ) return output["choices"][0]["message"]["content"] diff --git a/llm_convo/twilio_io.py b/llm_convo/twilio_io.py index c9d19a2..a53657e 100644 --- a/llm_convo/twilio_io.py +++ b/llm_convo/twilio_io.py @@ -12,7 +12,7 @@ import simple_websocket import audioop -from convo.audio_input import WhisperTwilioStream +from llm_convo.audio_input import WhisperTwilioStream XML_MEDIA_STREAM = """ diff --git a/requirements.txt b/requirements.txt index 808ae1f..9c8b872 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,11 @@ -black==23.1.0 - -gTTS -openai - -git+https://github.com/openai/whisper.git -SpeechRecognition -pyaudio -pydub ---extra-index-url https://download.pytorch.org/whl/cu116 -torch -torchaudio \ No newline at end of file +black~=23.1 +gTTS~=2.3 +openai~=0.28 +SpeechRecognition~=3.10 +pyaudio~=0.2 +pydub~=0.25 +pyngrok~=6.1.0 +gevent~=23.9.1 +twilio~=8.8.0 +flask~=2.3 +flask_sock~=0.6 diff --git a/setup.py b/setup.py index 92fde21..34790fa 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,10 @@ setup( - name="llm_convo", + name="llm_llm_convo", version="0.0.0", description="", - url="https://github.com/sshh12/llm_convo", + url="https://github.com/sshh12/llm_llm_convo", author="Shrivu Shankar", license="MIT", packages=find_packages(),