feat: add experiment launcher code to quickly switch between gpt and …

…mixtral
instadeepai · May 22, 2024 · 05ac846 · 05ac846
1 parent 700a752
commit 05ac846
Show file tree

Hide file tree

Showing 10 changed files with 125 additions and 106 deletions.
diff --git a/experiments/conf/system/chateval.yaml b/experiments/conf/system/chateval.yaml
@@ -13,13 +13,13 @@ agreement_intensity: -1 # -1 for default behavior, otherwise {0, ..., 10} for ag
 agents: # options: [gpt, palm]
   # Agent 1
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.cot}"
   # Agent 2
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.cot}"
   # Summarizer
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.summarizer}"
diff --git a/experiments/conf/system/google_mad.yaml b/experiments/conf/system/google_mad.yaml
@@ -14,18 +14,18 @@ agreement_intensity: -1 # -1 for default behavior, otherwise {0, ..., 10} for ag
 agents: # options: [gpt, palm]
   # GPT-3.5 agent
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.simple}"
     - few_shot_examples: ${system.medpalm_examples.few_shot} # None, ${system.medpalm_examples.few_shot} or ${system.medpalm_examples.cot_few_shot}
 
   # GPT-3.5 agent
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.simple}"
     - few_shot_examples: ${system.medpalm_examples.few_shot} # None, ${system.medpalm_examples.few_shot} or ${system.medpalm_examples.cot_few_shot}
 
   # GPT-3.5 agent
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.simple}"
     - few_shot_examples: ${system.medpalm_examples.few_shot} # None, ${system.medpalm_examples.few_shot} or ${system.medpalm_examples.cot_few_shot}
diff --git a/experiments/conf/system/gpt.yaml b/experiments/conf/system/gpt.yaml
@@ -17,14 +17,12 @@ defaults: # options: [simple, cot, letter, explain...]: any prompt used in 'agen
 gpt:
   _target_: debatellm.agents.GPT
   prompt: ${system.agent_prompts.simple}
-  engine: "mixtral-8x7b-instruct"
+  engine: "gpt-3.5-turbo-0613"
   few_shot_examples: False # Options include: [False, ${system.medpalm_examples.few_shot}, ${system.medpalm_examples.cot_few_shot}]
   mock: False
   sampling:
     max_tokens: 1000
     temperature: 0.5 # Taken from here: https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api-a-few-tips-and-tricks-on-controlling-the-creativity-deterministic-output-of-prompt-responses/172683
     top_p: 0.5
-  # cost_per_prompt_token: 0.001 # 0.03  # dollar costs per 1000 prompt token
-  # cost_per_response_token:  0.002 # 0.06  # dollar costs per 1000 response token
-  cost_per_prompt_token: 0.0006 # 0.6  # dollar costs per million prompt token
-  cost_per_response_token:  0.0006 # 0.6  # dollar costs per million response token
+  cost_per_prompt_token: 0.001 # 0.03  # dollar costs per 1000 prompt token
+  cost_per_response_token:  0.002 # 0.06  # dollar costs per 1000 response token
diff --git a/experiments/conf/system/medprompt.yaml b/experiments/conf/system/medprompt.yaml
@@ -13,7 +13,7 @@ name: medprompt
 agents: # options: [gpt, palm]
   # GPT-3 agent
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-4 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-4 engine
     - cost_per_prompt_token: 0.03  # dollar costs per 1000 prompt token
     - cost_per_response_token: 0.06  # dollar costs per 1000 response token
     - prompt: "${system.agent_prompts.cot_medprompt}"

diff --git a/experiments/conf/system/multi_agent_debate.yaml b/experiments/conf/system/multi_agent_debate.yaml
@@ -11,9 +11,9 @@ num_rounds: 2
 agents: # options: [gpt, palm]
   # GPT-3.5 agent
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.cot}"
   # GPT-3.5 agent
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.cot}"
diff --git a/experiments/conf/system/single_agent.yaml b/experiments/conf/system/single_agent.yaml
@@ -10,18 +10,18 @@ name: single_agent # Used for distinguishing between spp_synergy.
 agents: # options: [gpt, palm]
 
   # GPT-3.5 agent
-  # - - "${system.gpt}"
-  #   - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
-  #   - prompt: "${system.agent_prompts.simple}"
+  - - "${system.gpt}"
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
+    - prompt: "${system.agent_prompts.simple}"
 
   # PaLM agent
   # - - "${system.palm}" # palm uses default setup
   #   - prompt: "${system.agent_prompts.simple}"
   #   - engine: "text-bison@001"
 
     # Mixtral agent
-  - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # mixtral 8x7b instruct engine
-    - prompt: "${system.agent_prompts.simple}"
-    - cost_per_prompt_token: 0.0006 # 0.6  # dollar costs per million prompt token
-    - cost_per_response_token:  0.0006 # 0.6  # dollar costs per million response token
+  # - - "${system.gpt}"
+  #   - engine: "mixtral-8x7b-instruct" # mixtral 8x7b instruct engine
+  #   - prompt: "${system.agent_prompts.simple}"
+  #   - cost_per_prompt_token: 0.0006 # 0.6  # dollar costs per million prompt token
+  #   - cost_per_response_token:  0.0006 # 0.6  # dollar costs per million response token
diff --git a/experiments/conf/system/spp_synergy.yaml b/experiments/conf/system/spp_synergy.yaml
@@ -10,5 +10,5 @@ name: spp_synergy # Used for distinguishing between single agent GPT.
 agents: # options: [gpt, palm]
   # Agent 1
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.spp_original}" # options: [spp_expert, spp_original]
diff --git a/experiments/conf/system/tsinghua_mad.yaml b/experiments/conf/system/tsinghua_mad.yaml
@@ -14,21 +14,21 @@ agreement_intensity: -1 # -1: fallback to default prompt. [0, 1, ..., 10]: agree
 agents: # options: [gpt, palm]
   # Agent 1
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.angel}"
     - prompt_from_history: "tsinghua_mad"
     - few_shot_examples: None # None, ${system.medpalm_examples.few_shot} or ${system.medpalm_examples.cot_few_shot}
 
   # Agent 2
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.devil}"
     - prompt_from_history: "tsinghua_mad"
     - few_shot_examples: None # None, ${system.medpalm_examples.few_shot} or ${system.medpalm_examples.cot_few_shot}
 
   # Judge
   - - "${system.gpt}"
-    - engine: "mixtral-8x7b-instruct" # gpt uses gpt-3.5 engine
+    - engine: "gpt-3.5-turbo-0613" # gpt uses gpt-3.5 engine
     - prompt: "${system.agent_prompts.judge_tsinghua}"
     - prompt_from_history: "tsinghua_judge"
     - few_shot_examples: None # None, ${system.medpalm_examples.few_shot} or ${system.medpalm_examples.cot_few_shot}

diff --git a/scripts/experiments_utils.py b/scripts/experiments_utils.py
@@ -43,7 +43,7 @@ def encode_config(config):
 
 def gen_agent_config(
     num_agents=1,
-    use_gpt=True,
+    agent_type="gpt",
     prompt="cot",
     is_es=False,
     use_few_shot_examples=None,
@@ -52,7 +52,7 @@ def gen_agent_config(
 
     settings = {
         "num_agents": num_agents,
-        "use_gpt": use_gpt,
+        "agent": agent_type,
         "prompt": prompt,
         "use_few_shot_examples": use_few_shot_examples,
     }
@@ -65,10 +65,29 @@ def gen_agent_config(
     exps = generate_combinations(settings)
 
     encodings = []
+    agents_dict = {
+        "gpt": "${system.gpt}",
+        "palm": "${system.palm}",
+        "mixtral": "${system.gpt}",
+    }
+
+    engine_dict = {
+        "gpt": "gpt-3.5-turbo-0613",
+        "palm": "text-bison@001",
+        "mixtral": "mixtral-8x7b-instruct",
+    }
+
+    cost = {
+        "gpt": {"prompt": 0.001, "response": 0.002},
+        "palm": {"prompt": 0.0005, "response": 0.0005},
+        "mixtral": {"prompt": 0.0006, "response": 0.0006},
+    }
+
     for exp in exps:
         prompt = "${system.agent_prompts." + exp["prompt"] + "}"
-        agent = "${system.gpt}" if exp["use_gpt"] else "${system.palm}"
-        base_agent = [agent, {"prompt": prompt}]
+        base_agent = [agents_dict[agent_type], {"prompt": prompt}, {"engine": engine_dict[agent_type]},
+                        {"cost_per_prompt_token": cost[agent_type]["prompt"]},
+                        {"cost_per_response_token": cost[agent_type]["response"]}]
 
         if is_es:
             base_agent.append({"sampling": {"temperature": 0.7, "top_p": 0.5}})