update

sshh12 · Sep 17, 2023 · 68dcce4 · 68dcce4
1 parent 3a6d656
commit 68dcce4
Show file tree

Hide file tree

Showing 11 changed files with 205 additions and 29 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,7 +1,10 @@
 {
   "editor.formatOnSave": true,
-  "python.formatting.provider": "black",
+  "python.formatting.provider": "none",
   "python.formatting.blackArgs": ["--line-length", "120"],
   "python.linting.pylintUseMinimalCheckers": false,
-  "editor.showUnused": true
+  "editor.showUnused": true,
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.black-formatter"
+  }
 }
diff --git a/README.md b/README.md
@@ -12,14 +12,34 @@ Twilio Webhook -> Flask app -> Twilio Media Stream (websocket) -> Whisper -> Cha
 2. Environment Variables
 
 ```
-OPENAI_KEY=
+OPENAI_API_KEY=
 TWILIO_ACCOUNT_SID=
 TWILIO_AUTH_TOKEN=
 TWILIO_PHONE_NUMBER=
 ```
 
 ### Demo
 
+#### Basic Text Chat
+
+Try `python examples\keyboard_chat_with_gpt.py` to chat with GPT through terminal.
+
+#### Twilio Helpline
+
+Try `python examples\twilio_ngrok_ml_rhyme_hotline.py --preload_whisper --start_ngrok`. This requires whisper installed locally.
+
+This will create an ngrok tunnel and provide a webhook URL to point to in Twilio settings for a purchased phone number.
+
+<img width="1169" alt="chrome_VZSfJHN6FV" src="https://github.com/sshh12/llm_convo/assets/6625384/1fe9468d-0eb3-4309-9b81-1d2f3d02c353">
+
+#### Twilio Pizza Order
+
+Try `python examples\twilio_ngrok_pizza_order.py --preload_whisper --start_ngrok --phone_number "+1.........."`. This requires whisper installed locally.
+
+This will create an ngrok tunnel and provide a webhook URL to point to in Twilio settings for a purchased phone number. Once the webhook is updated, it will start an outgoing call to the provided phone number.
+
+#### Code Snippets
+
 Setup a Haiku hotline with Twilio that can be called like any other phone number.
 
 ```python

diff --git a/examples/keyboard_chat_with_gpt.py b/examples/keyboard_chat_with_gpt.py
@@ -0,0 +1,27 @@
+from gevent import monkey
+
+monkey.patch_all()
+
+import logging
+import argparse
+import tempfile
+import os
+from llm_convo.agents import OpenAIChat, TerminalInPrintOut
+from llm_convo.conversation import run_conversation
+
+
+def main(model):
+    agent_a = OpenAIChat(
+        system_prompt="You are a machine learning assistant. Answer the users questions about machine learning with short rhymes. Ask follow up questions when needed to help clarify their question.",
+        init_phrase="Hello! Welcome to the Machine Learning hotline, how can I help?",
+        model=model,
+    )
+    agent_b = TerminalInPrintOut()
+    run_conversation(agent_a, agent_b)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, default="gpt-3.5-turbo")
+    args = parser.parse_args()
+    main(args.model)
diff --git a/examples/twilio_ngrok_ml_rhyme_hotline.py b/examples/twilio_ngrok_ml_rhyme_hotline.py
@@ -0,0 +1,54 @@
+from gevent import monkey
+
+monkey.patch_all()
+
+import logging
+import argparse
+import tempfile
+import os
+import time
+from llm_convo.agents import OpenAIChat, TwilioCaller
+from llm_convo.audio_input import get_whisper_model
+from llm_convo.twilio_io import TwilioServer
+from llm_convo.conversation import run_conversation
+from pyngrok import ngrok
+
+
+def main(port, remote_host, start_ngrok):
+    if start_ngrok:
+        ngrok_http = ngrok.connect(port)
+        remote_host = ngrok_http.public_url.split("//")[1]
+
+    static_dir = os.path.join(tempfile.gettempdir(), "twilio_static")
+    os.makedirs(static_dir, exist_ok=True)
+
+    logging.info(f"Starting server at {remote_host} from local:{port}, serving static content from {static_dir}")
+    logging.info(f"Set call webhook to https://{remote_host}/incoming-voice")
+
+    tws = TwilioServer(remote_host=remote_host, port=port, static_dir=static_dir)
+    tws.start()
+    agent_a = OpenAIChat(
+        system_prompt="You are a machine learning assistant. Answer the users questions about machine learning with short rhymes. Ask follow up questions when needed to help clarify their question.",
+        init_phrase="Hello! Welcome to the Machine Learning hotline, how can I help?",
+    )
+
+    def run_chat(sess):
+        agent_b = TwilioCaller(sess, thinking_phrase="One moment.")
+        while not agent_b.session.media_stream_connected():
+            time.sleep(0.1)
+        run_conversation(agent_a, agent_b)
+
+    tws.on_session = run_chat
+
+
+if __name__ == "__main__":
+    logging.getLogger().setLevel(logging.INFO)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--preload_whisper", action="store_true")
+    parser.add_argument("--start_ngrok", action="store_true")
+    parser.add_argument("--port", type=int, default=8080)
+    parser.add_argument("--remote_host", type=str, default="localhost")
+    args = parser.parse_args()
+    if args.preload_whisper:
+        get_whisper_model()
+    main(args.port, args.remote_host, args.start_ngrok)
diff --git a/examples/twilio_ngrok_pizza_order.py b/examples/twilio_ngrok_pizza_order.py
@@ -0,0 +1,72 @@
+from gevent import monkey
+
+monkey.patch_all()
+
+import logging
+import argparse
+import tempfile
+import os
+import time
+import sys
+from llm_convo.agents import OpenAIChat, TwilioCaller
+from llm_convo.audio_input import get_whisper_model
+from llm_convo.twilio_io import TwilioServer
+from llm_convo.conversation import run_conversation
+from pyngrok import ngrok
+
+
+def main(port, remote_host, start_ngrok, phone_number):
+    if start_ngrok:
+        ngrok_http = ngrok.connect(port)
+        remote_host = ngrok_http.public_url.split("//")[1]
+
+    static_dir = os.path.join(tempfile.gettempdir(), "twilio_static")
+    os.makedirs(static_dir, exist_ok=True)
+
+    logging.info(
+        f"Starting server at {remote_host} from local:{port}, serving static content from {static_dir}, will call {phone_number}"
+    )
+    logging.info(f"Set call webhook to https://{remote_host}/incoming-voice")
+
+    input(" >>> Press enter to start the call after ensuring the webhook is set. <<< ")
+
+    tws = TwilioServer(remote_host=remote_host, port=port, static_dir=static_dir)
+    tws.start()
+    agent_a = OpenAIChat(
+        system_prompt="""
+    You are an ordering bot that is going to call a pizza place an order a pizza.
+    When you need to say numbers space them out (e.g. 1 2 3) and do not respond with abbreviations.
+    If they ask for information not known, make something up that's reasonable.
+
+    The customer's details are:
+    * Address: 1234 Candyland Road, Apt 506
+    * Credit Card: 1234 5555 8888 9999 (CVV: 010)
+    * Name: Bob Joe
+    * Order: 1 large pizza with only pepperoni
+    """,
+        init_phrase="Hi, I would like to order a pizza.",
+    )
+
+    def run_chat(sess):
+        agent_b = TwilioCaller(sess, thinking_phrase="One moment.")
+        while not agent_b.session.media_stream_connected():
+            time.sleep(0.1)
+        run_conversation(agent_a, agent_b)
+        sys.exit(0)
+
+    tws.on_session = run_chat
+    tws.start_call(phone_number)
+
+
+if __name__ == "__main__":
+    logging.getLogger().setLevel(logging.INFO)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--phone_number", type=str)
+    parser.add_argument("--preload_whisper", action="store_true")
+    parser.add_argument("--start_ngrok", action="store_true")
+    parser.add_argument("--port", type=int, default=8080)
+    parser.add_argument("--remote_host", type=str, default="localhost")
+    args = parser.parse_args()
+    if args.preload_whisper:
+        get_whisper_model()
+    main(args.port, args.remote_host, args.start_ngrok, args.phone_number)
diff --git a/llm_convo/agents.py b/llm_convo/agents.py
@@ -1,10 +1,10 @@
 from typing import List, Optional
 from abc import ABC, abstractmethod
 
-from convo.audio_input import WhisperMicrophone
-from convo.audio_output import TTSClient, GoogleTTS
-from convo.openai_io import OpenAIChatCompletion
-from convo.twilio_io import TwilioCallSession
+from llm_convo.audio_input import WhisperMicrophone
+from llm_convo.audio_output import TTSClient, GoogleTTS
+from llm_convo.openai_io import OpenAIChatCompletion
+from llm_convo.twilio_io import TwilioCallSession
 
 
 class ChatAgent(ABC):
@@ -35,8 +35,8 @@ def get_response(self, transcript: List[str]) -> str:
 
 
 class OpenAIChat(ChatAgent):
-    def __init__(self, system_prompt: str, init_phrase: Optional[str] = None):
-        self.openai_chat = OpenAIChatCompletion(system_prompt=system_prompt)
+    def __init__(self, system_prompt: str, init_phrase: Optional[str] = None, model: Optional[str] = None):
+        self.openai_chat = OpenAIChatCompletion(system_prompt=system_prompt, model=model)
         self.init_phrase = init_phrase
 
     def get_response(self, transcript: List[str]) -> str:

diff --git a/llm_convo/conversation.py b/llm_convo/conversation.py
@@ -1,4 +1,4 @@
-from convo.agents import ChatAgent
+from llm_convo.agents import ChatAgent
 
 
 def run_conversation(agent_a: ChatAgent, agent_b: ChatAgent):

diff --git a/llm_convo/openai_io.py b/llm_convo/openai_io.py
@@ -1,13 +1,14 @@
-from typing import List
+from typing import List, Optional
 import os
 import openai
 
-openai.api_key = os.environ["OPENAI_KEY"]
+openai.api_key = os.environ["OPENAI_API_KEY"]
 
 
 class OpenAIChatCompletion:
-    def __init__(self, system_prompt: str):
+    def __init__(self, system_prompt: str, model: Optional[str] = None):
         self.system_prompt = system_prompt
+        self.model = model
 
     def get_response(self, transcript: List[str]) -> str:
         messages = [
@@ -16,7 +17,7 @@ def get_response(self, transcript: List[str]) -> str:
         for i, text in enumerate(reversed(transcript)):
             messages.insert(1, {"role": "user" if i % 2 == 0 else "assistant", "content": text})
         output = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
+            model="gpt-3.5-turbo" if self.model is None else self.model,
             messages=messages,
         )
         return output["choices"][0]["message"]["content"]
diff --git a/llm_convo/twilio_io.py b/llm_convo/twilio_io.py
@@ -12,7 +12,7 @@
 import simple_websocket
 import audioop
 
-from convo.audio_input import WhisperTwilioStream
+from llm_convo.audio_input import WhisperTwilioStream
 
 
 XML_MEDIA_STREAM = """

diff --git a/requirements.txt b/requirements.txt
@@ -1,12 +1,11 @@
-black==23.1.0
-
-gTTS
-openai
-
-git+https://github.com/openai/whisper.git
-SpeechRecognition
-pyaudio
-pydub
---extra-index-url https://download.pytorch.org/whl/cu116
-torch 
-torchaudio
+black~=23.1
+gTTS~=2.3
+openai~=0.28
+SpeechRecognition~=3.10
+pyaudio~=0.2
+pydub~=0.25
+pyngrok~=6.1.0
+gevent~=23.9.1
+twilio~=8.8.0
+flask~=2.3
+flask_sock~=0.6
diff --git a/setup.py b/setup.py
@@ -5,10 +5,10 @@
 
 
 setup(
-    name="llm_convo",
+    name="llm_llm_convo",
     version="0.0.0",
     description="",
-    url="https://github.com/sshh12/llm_convo",
+    url="https://github.com/sshh12/llm_llm_convo",
     author="Shrivu Shankar",
     license="MIT",
     packages=find_packages(),