diff --git a/engine/baml-lib/llm-client/src/clients/helpers.rs b/engine/baml-lib/llm-client/src/clients/helpers.rs
index 3dd5868e8..610d9fe3c 100644
--- a/engine/baml-lib/llm-client/src/clients/helpers.rs
+++ b/engine/baml-lib/llm-client/src/clients/helpers.rs
@@ -326,6 +326,28 @@ impl<Meta: Clone> PropertyHandler<Meta> {
         UnresolvedAllowedRoleMetadata::None
     }
 
+    pub fn ensure_query_params(&mut self) -> Option<IndexMap<String, StringOr>> {
+        self.ensure_map("query_params", false).map(|(_, value, _)| {
+            value
+                .into_iter()
+                .filter_map(|(k, (_, v))| match v.as_str() {
+                    Some(s) => Some((k, s.clone())),
+                    None => {
+                        self.push_error(
+                            format!(
+                                "Query param key {} must have a string value. Got: {}",
+                                k,
+                                v.r#type()
+                            ),
+                            v.meta().clone(),
+                        );
+                        None
+                    }
+                })
+                .collect()
+        })
+    }
+
     pub fn ensure_headers(&mut self) -> Option<IndexMap<String, StringOr>> {
         self.ensure_map("headers", false).map(|(_, value, _)| {
             value
diff --git a/engine/baml-lib/llm-client/src/clients/openai.rs b/engine/baml-lib/llm-client/src/clients/openai.rs
index d427a9ef7..ed6584605 100644
--- a/engine/baml-lib/llm-client/src/clients/openai.rs
+++ b/engine/baml-lib/llm-client/src/clients/openai.rs
@@ -289,10 +289,17 @@ impl<Meta: Clone> UnresolvedOpenAI<Meta> {
             .map(|v| v.clone())
             .unwrap_or_else(|| StringOr::EnvVar("AZURE_OPENAI_API_KEY".to_string()));
 
-        let mut query_params = IndexMap::new();
-        if let Some((_, v, _)) = properties.ensure_string("api_version", false) {
-            query_params.insert("api-version".to_string(), v.clone());
-        }
+        let query_params = match properties.ensure_query_params() {
+            Some(query_params) => query_params,
+            None => {
+                // you can override the query params by providing a query_params field in the client spec
+                let mut query_params = IndexMap::new();
+                if let Some((_, v, _)) = properties.ensure_string("api_version", false) {
+                    query_params.insert("api-version".to_string(), v.clone());
+                }
+                query_params
+            }
+        };
 
         let mut instance = Self::create_common(properties, base_url, None)?;
         instance.query_params = query_params;
@@ -342,6 +349,7 @@ impl<Meta: Clone> UnresolvedOpenAI<Meta> {
         let supported_request_modes = properties.ensure_supported_request_modes();
         let headers = properties.ensure_headers().unwrap_or_default();
         let finish_reason_filter = properties.ensure_finish_reason_filter();
+        let query_params = properties.ensure_query_params().unwrap_or_default();
         let (properties, errors) = properties.finalize();
 
         if !errors.is_empty() {
@@ -356,7 +364,7 @@ impl<Meta: Clone> UnresolvedOpenAI<Meta> {
             supported_request_modes,
             headers,
             properties,
-            query_params: IndexMap::new(),
+            query_params,
             finish_reason_filter,
         })
     }
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs
index f335984a4..53c8a6a14 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs
@@ -86,76 +86,6 @@ impl WithClient for OpenAIClient {
 }
 
 impl WithNoCompletion for OpenAIClient {}
-// TODO: Enable completion with support for completion streams
-// impl WithCompletion for OpenAIClient {
-//     fn completion_options(
-//         &self,
-//         ctx: &RuntimeContext,
-//     ) -> Result<internal_baml_jinja::CompletionOptions> {
-//         return Ok(internal_baml_jinja::CompletionOptions::new("\n".into()));
-//     }
-
-//     async fn completion(&self, ctx: &RuntimeContext, prompt: &String) -> LLMResponse {
-//         let (response, system_start, instant_start) =
-//             match make_parsed_request::<CompletionResponse>(
-//                 self,
-//                 either::Either::Left(prompt),
-//                 false,
-//             )
-//             .await
-//             {
-//                 Ok(v) => v,
-//                 Err(e) => return e,
-//             };
-
-//         if response.choices.len() != 1 {
-//             return LLMResponse::LLMFailure(LLMErrorResponse {
-//                 client: self.context.name.to_string(),
-//                 model: None,
-//                 prompt: internal_baml_jinja::RenderedPrompt::Completion(prompt.clone()),
-//                 start_time: system_start,
-//                 latency: instant_start.elapsed(),
-//                 request_options: self.properties.properties.clone(),
-//                 message: format!(
-//                     "Expected exactly one choices block, got {}",
-//                     response.choices.len()
-//                 ),
-//                 code: ErrorCode::Other(200),
-//             });
-//         }
-
-//         let usage = response.usage.as_ref();
-
-//         LLMResponse::Success(LLMCompleteResponse {
-//             client: self.context.name.to_string(),
-//             prompt: internal_baml_jinja::RenderedPrompt::Completion(prompt.clone()),
-//             content: response.choices[0].text.clone(),
-//             start_time: system_start,
-//             latency: instant_start.elapsed(),
-//             model: response.model,
-//             request_options: self.properties.properties.clone(),
-//             metadata: LLMCompleteResponseMetadata {
-//                 baml_is_complete: match response.choices.get(0) {
-//                     Some(c) => match c.finish_reason {
-//                         Some(FinishReason::Stop) => true,
-//                         _ => false,
-//                     },
-//                     None => false,
-//                 },
-//                 finish_reason: match response.choices.get(0) {
-//                     Some(c) => match c.finish_reason {
-//                         Some(FinishReason::Stop) => Some(FinishReason::Stop.to_string()),
-//                         _ => None,
-//                     },
-//                     None => None,
-//                 },
-//                 prompt_tokens: usage.map(|u| u.prompt_tokens),
-//                 output_tokens: usage.map(|u| u.completion_tokens),
-//                 total_tokens: usage.map(|u| u.total_tokens),
-//             },
-//         })
-//     }
-// }
 
 impl WithChat for OpenAIClient {
     async fn chat(&self, _ctx: &RuntimeContext, prompt: &[RenderedChatMessage]) -> LLMResponse {
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/types.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/types.rs
index 377ca6402..e8f125ea6 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/types.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/types.rs
@@ -13,7 +13,7 @@ pub struct ChatCompletionGeneric<C> {
     /// A list of chat completion choices. Can be more than one if `n` is greater than 1.s
     pub choices: Vec<C>,
     /// The Unix timestamp (in seconds) of when the chat completion was created.
-    pub created: u32,
+    pub created: Option<u32>,
     /// The model used for the chat completion.
     pub model: String,
     /// This fingerprint represents the backend configuration that the model runs with.
@@ -22,7 +22,7 @@ pub struct ChatCompletionGeneric<C> {
     pub system_fingerprint: Option<String>,
 
     /// The object type, which is `chat.completion` for non-streaming chat completion, `chat.completion.chunk` for streaming chat completion.
-    pub object: String,
+    pub object: Option<String>,
     pub usage: Option<CompletionUsage>,
 }
 
diff --git a/fern/03-reference/baml/clients/providers/azure-ai-foundary.mdx b/fern/03-reference/baml/clients/providers/azure-ai-foundary.mdx
new file mode 100644
index 000000000..060213b95
--- /dev/null
+++ b/fern/03-reference/baml/clients/providers/azure-ai-foundary.mdx
@@ -0,0 +1,20 @@
+---
+title: Azure AI Foundary  
+---
+
+To use the Azure AI Foundary ([https://ai.azure.com](https://ai.azure.com)), you can leverage the [`openai-generic`](/docs/snippets/clients/providers/openai) provider.
+
+**Example:**
+
+```baml BAML
+client<llm> MyClient {
+  provider "openai-generic"
+  options {
+    base_url "https://RESOURCE_NAME.REGION.models.ai.azure.com"
+    api_key env.MY_API_KEY
+  }
+}
+```
+
+See here to see how to get your API key and base url:
+<img src="/assets/languages/azure-ai-foundary.png" alt="Azure AI Foundary" />
diff --git a/fern/assets/languages/azure-ai-foundary.png b/fern/assets/languages/azure-ai-foundary.png
new file mode 100644
index 000000000..9a06ec9db
Binary files /dev/null and b/fern/assets/languages/azure-ai-foundary.png differ
diff --git a/fern/docs.yml b/fern/docs.yml
index 70bbbccd6..4cd472b72 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -462,6 +462,8 @@ navigation:
             path: 03-reference/baml/clients/providers/azure.mdx
           - page: "openai-generic"
             path: 03-reference/baml/clients/providers/openai-generic.mdx
+          - page: "openai-generic: Azure AI Foundary"
+            path: 03-reference/baml/clients/providers/azure-ai-foundary.mdx
           - page: "openai-generic: Groq"
             path: 03-reference/baml/clients/providers/groq.mdx
           - page: "openai-generic: Hugging Face"
diff --git a/integ-tests/baml_src/test-files/models/deepseek-azure.baml b/integ-tests/baml_src/test-files/models/deepseek-azure.baml
new file mode 100644
index 000000000..c83346a9c
--- /dev/null
+++ b/integ-tests/baml_src/test-files/models/deepseek-azure.baml
@@ -0,0 +1,25 @@
+client<llm> DeepSeekAzure {
+    provider openai-generic
+    options {
+        base_url "https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com"
+        api_key env.DEEPSEEK_AZURE_API_KEY
+        max_tokens 10
+    }
+}
+
+function TellStory(story: string) -> string {
+  client DeepSeekAzure
+  prompt #"
+    You are a storyteller. Tell a story about the following:
+    {{ _.role("user") }} {{ story }}
+  "#
+}
+
+test TellStory {
+  functions [TellStory]
+  args {
+    story #"
+      Once upon a time, there was a cat who loved to play with yarn.
+    "#
+  }
+}
diff --git a/integ-tests/gen-baml-client.py b/integ-tests/gen-baml-client.py
index 3837e0be3..26ac548de 100644
--- a/integ-tests/gen-baml-client.py
+++ b/integ-tests/gen-baml-client.py
@@ -33,6 +33,7 @@
     "python": FILE_PATH / "python" / "baml_client",
     "typescript": FILE_PATH / "typescript" / "baml_client",
     "ruby": FILE_PATH / "ruby" / "baml_client",
+    "react": FILE_PATH / "react" / "baml_client",
     "openapi": FILE_PATH / "openapi" / "baml_client",
 }
 
@@ -41,6 +42,7 @@ class Spinner:
     """
     A simple spinner to show progress while waiting for a command to finish.
     """
+
     def __init__(self, delay=0.1):
         self.delay = delay
         self.spinner = itertools.cycle(["-", "\\", "|", "/"])
@@ -215,9 +217,11 @@ def diff_snapshots(snapshot1: dict, snapshot2: dict) -> str:
         content1 = snapshot1.get(file, "").splitlines(keepends=True)
         content2 = snapshot2.get(file, "").splitlines(keepends=True)
         if content1 != content2:
-            diff = list(unified_diff(content1, content2,
-                                     fromfile=f"{file} (v1)",
-                                     tofile=f"{file} (v2)"))
+            diff = list(
+                unified_diff(
+                    content1, content2, fromfile=f"{file} (v1)", tofile=f"{file} (v2)"
+                )
+            )
             if diff:
                 diff_lines.append(f"--- Diff for {file} ---\n" + "".join(diff))
     return "\n".join(diff_lines)
@@ -228,27 +232,54 @@ def print_table(table_rows):
     Prints a summary table given the rows.
     Each row is a tuple: (Generator, Target, Iteration, Hash)
     """
-    col1_width = max(len("Generator"), max((len(row[0]) for row in table_rows), default=0))
+    col1_width = max(
+        len("Generator"), max((len(row[0]) for row in table_rows), default=0)
+    )
     col2_width = max(len("Target"), max((len(row[1]) for row in table_rows), default=0))
-    col3_width = max(len("Iteration"), max((len(str(row[2])) for row in table_rows), default=0))
+    col3_width = max(
+        len("Iteration"), max((len(str(row[2])) for row in table_rows), default=0)
+    )
     col4_width = max(len("Hash"), max((len(row[3]) for row in table_rows), default=0))
 
     sep_line = (
-        "+" + "-" * (col1_width + 2) +
-        "+" + "-" * (col2_width + 2) +
-        "+" + "-" * (col3_width + 2) +
-        "+" + "-" * (col4_width + 2) + "+"
+        "+"
+        + "-" * (col1_width + 2)
+        + "+"
+        + "-" * (col2_width + 2)
+        + "+"
+        + "-" * (col3_width + 2)
+        + "+"
+        + "-" * (col4_width + 2)
+        + "+"
     )
 
     print(sep_line)
-    print("| {0:<{w1}} | {1:<{w2}} | {2:<{w3}} | {3:<{w4}} |".format(
-        "Generator", "Target", "Iteration", "Hash",
-        w1=col1_width, w2=col2_width, w3=col3_width, w4=col4_width))
+    print(
+        "| {0:<{w1}} | {1:<{w2}} | {2:<{w3}} | {3:<{w4}} |".format(
+            "Generator",
+            "Target",
+            "Iteration",
+            "Hash",
+            w1=col1_width,
+            w2=col2_width,
+            w3=col3_width,
+            w4=col4_width,
+        )
+    )
     print(sep_line)
     for row in table_rows:
-        print("| {0:<{w1}} | {1:<{w2}} | {2:<{w3}} | {3:<{w4}} |".format(
-            row[0], row[1], row[2], row[3],
-            w1=col1_width, w2=col2_width, w3=col3_width, w4=col4_width))
+        print(
+            "| {0:<{w1}} | {1:<{w2}} | {2:<{w3}} | {3:<{w4}} |".format(
+                row[0],
+                row[1],
+                row[2],
+                row[3],
+                w1=col1_width,
+                w2=col2_width,
+                w3=col3_width,
+                w4=col4_width,
+            )
+        )
     print(sep_line)
 
 
@@ -260,19 +291,21 @@ def main():
         "--iterations",
         type=int,
         default=3,
-        help="Number of iterations to run each codegen (default: 3)"
+        help="Number of iterations to run each codegen (default: 3)",
     )
     parser.add_argument(
         "--verbose",
         action="store_true",
-        help="Show detailed output from codegen commands"
+        help="Show detailed output from codegen commands",
     )
     parser.add_argument(
         "--only",
         type=str,
         default="",
-        help=("Comma-separated list of generator commands to run. "
-              "Options: python, typescript (default: both)")
+        help=(
+            "Comma-separated list of generator commands to run. "
+            "Options: python, typescript (default: both)"
+        ),
     )
     args = parser.parse_args()
 
@@ -292,7 +325,9 @@ def main():
         selected = {lang.strip().lower() for lang in args.only.split(",")}
         generators_to_run = available_generators.intersection(selected)
         if not generators_to_run:
-            print(f"{BOLD}{RED}No valid generators selected. Available options: {', '.join(available_generators)}{RESET}")
+            print(
+                f"{BOLD}{RED}No valid generators selected. Available options: {', '.join(available_generators)}{RESET}"
+            )
             sys.exit(1)
     else:
         generators_to_run = available_generators
@@ -303,8 +338,12 @@ def main():
     # Dictionaries to store per-run results:
     # results[generator][target] = list of hash strings (one per iteration)
     # snapshots[generator][target] = list of snapshots (each a dict mapping relative file path to content)
-    results = {gen: {target: [] for target in targets_to_check} for gen in generators_to_run}
-    snapshots = {gen: {target: [] for target in targets_to_check} for gen in generators_to_run}
+    results = {
+        gen: {target: [] for target in targets_to_check} for gen in generators_to_run
+    }
+    snapshots = {
+        gen: {target: [] for target in targets_to_check} for gen in generators_to_run
+    }
     table_rows = []  # List of tuples: (Generator, Target, Iteration, Hash)
     overall_fail = 0
 
@@ -329,7 +368,9 @@ def main():
             try:
                 generate_func(args.verbose)
             except subprocess.CalledProcessError as e:
-                print(f"{BOLD}{RED}Error during '{gen}' codegen on iteration {iteration}.{RESET}")
+                print(
+                    f"{BOLD}{RED}Error during '{gen}' codegen on iteration {iteration}.{RESET}"
+                )
                 sys.exit(e.returncode)
             # For each target, record the hash and capture a snapshot.
             for target in targets_to_check:
@@ -354,9 +395,13 @@ def main():
                     unique_versions[h] = snapshots[gen][target][idx]
             version_count = len(unique_versions)
             if version_count == 1:
-                print(f"{BOLD}{GREEN}✅ {target} codegen is stable for generator '{gen}': in {runs} runs, generated 1 version.{RESET}\n")
+                print(
+                    f"{BOLD}{GREEN}✅ {target} codegen is stable for generator '{gen}': in {runs} runs, generated 1 version.{RESET}\n"
+                )
             else:
-                print(f"{BOLD}{RED}❌ {target} codegen is unstable for generator '{gen}': in {runs} runs, generated {version_count} versions:{RESET}")
+                print(
+                    f"{BOLD}{RED}❌ {target} codegen is unstable for generator '{gen}': in {runs} runs, generated {version_count} versions:{RESET}"
+                )
                 for h in unique_versions:
                     print(f"  Version hash: {h}")
                 overall_fail += 1
@@ -367,12 +412,16 @@ def main():
                         continue
                     diff_text = diff_snapshots(baseline_snapshot, snap)
                     if diff_text:
-                        print(f"{YELLOW}Diff between baseline version ({baseline_hash}) and version ({h}):{RESET}")
+                        print(
+                            f"{YELLOW}Diff between baseline version ({baseline_hash}) and version ({h}):{RESET}"
+                        )
                         print(diff_text)
                 print("")
 
     if overall_fail > 0:
-        print(f"{BOLD}{RED}Failed stability checks for {overall_fail} generator/target combination(s).{RESET}")
+        print(
+            f"{BOLD}{RED}Failed stability checks for {overall_fail} generator/target combination(s).{RESET}"
+        )
         sys.exit(1)
     else:
         print(f"{BOLD}{GREEN}All codegen stability checks passed!{RESET}")
diff --git a/integ-tests/openapi/baml_client/openapi.yaml b/integ-tests/openapi/baml_client/openapi.yaml
index 821156fdc..1ed8a3c4d 100644
--- a/integ-tests/openapi/baml_client/openapi.yaml
+++ b/integ-tests/openapi/baml_client/openapi.yaml
@@ -1423,6 +1423,19 @@ paths:
                 title: TakeRecAliasDepResponse
                 $ref: '#/components/schemas/RecursiveAliasDependency'
       operationId: TakeRecAliasDep
+  /call/TellStory:
+    post:
+      requestBody:
+        $ref: '#/components/requestBodies/TellStory'
+      responses:
+        '200':
+          description: Successful operation
+          content:
+            application/json:
+              schema:
+                title: TellStoryResponse
+                type: string
+      operationId: TellStory
   /call/TestAnthropic:
     post:
       requestBody:
@@ -3884,6 +3897,22 @@ components:
             required:
             - input
             additionalProperties: false
+    TellStory:
+      required: true
+      content:
+        application/json:
+          schema:
+            title: TellStoryRequest
+            type: object
+            properties:
+              story:
+                type: string
+              __baml_options__:
+                nullable: true
+                $ref: '#/components/schemas/BamlOptions'
+            required:
+            - story
+            additionalProperties: false
     TestAnthropic:
       required: true
       content:
diff --git a/integ-tests/python/baml_client/async_client.py b/integ-tests/python/baml_client/async_client.py
index 065636b45..0d4ca98ef 100644
--- a/integ-tests/python/baml_client/async_client.py
+++ b/integ-tests/python/baml_client/async_client.py
@@ -2327,6 +2327,29 @@ async def TakeRecAliasDep(
       )
       return cast(types.RecursiveAliasDependency, raw.cast_to(types, types, partial_types, False))
     
+    async def TellStory(
+        self,
+        story: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      raw = await self.__runtime.call_function(
+        "TellStory",
+        {
+          "story": story,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+      return cast(str, raw.cast_to(types, types, partial_types, False))
+    
     async def TestAnthropic(
         self,
         input: str,
@@ -6694,6 +6717,36 @@ def TakeRecAliasDep(
         self.__ctx_manager.get(),
       )
     
+    def TellStory(
+        self,
+        story: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlStream[Optional[str], str]:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      raw = self.__runtime.stream_function(
+        "TellStory",
+        {
+          "story": story,
+        },
+        None,
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+
+      return baml_py.BamlStream[Optional[str], str](
+        raw,
+        lambda x: cast(Optional[str], x.cast_to(types, types, partial_types, True)),
+        lambda x: cast(str, x.cast_to(types, types, partial_types, False)),
+        self.__ctx_manager.get(),
+      )
+    
     def TestAnthropic(
         self,
         input: str,
diff --git a/integ-tests/python/baml_client/inlinedbaml.py b/integ-tests/python/baml_client/inlinedbaml.py
index 7cd135509..7f0c08e7b 100644
--- a/integ-tests/python/baml_client/inlinedbaml.py
+++ b/integ-tests/python/baml_client/inlinedbaml.py
@@ -92,6 +92,7 @@
     "test-files/functions/prompts/with-chat-messages.baml": "\nfunction PromptTestOpenAIChat(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestOpenAIChatNoSystem(input: string) -> string {\n  client GPT35\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChat(input: string) -> string {\n  client Claude\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChatNoSystem(input: string) -> string {\n  client Claude\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\ntest TestSystemAndNonSystemChat1 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"cats\"\n  }\n}\n\ntest TestSystemAndNonSystemChat2 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"lion\"\n  }\n}",
     "test-files/functions/v2/basic.baml": "\n\nfunction ExtractResume2(resume: string) -> Resume {\n    client GPT4\n    prompt #\"\n        {{ _.role('system') }}\n\n        Extract the following information from the resume:\n\n        Resume:\n        <<<<\n        {{ resume }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}\n\n\nclass WithReasoning {\n    value string\n    reasoning string @description(#\"\n        Why the value is a good fit.\n    \"#)\n}\n\n\nclass SearchParams {\n    dateRange int? @description(#\"\n        In ISO duration format, e.g. P1Y2M10D.\n    \"#)\n    location string[]\n    jobTitle WithReasoning? @description(#\"\n        An exact job title, not a general category.\n    \"#)\n    company WithReasoning? @description(#\"\n        The exact name of the company, not a product or service.\n    \"#)\n    description WithReasoning[] @description(#\"\n        Any specific projects or features the user is looking for.\n    \"#)\n    tags (Tag | string)[]\n}\n\nenum Tag {\n    Security\n    AI\n    Blockchain\n}\n\nfunction GetQuery(query: string) -> SearchParams {\n    client GPT4\n    prompt #\"\n        Extract the following information from the query:\n\n        Query:\n        <<<<\n        {{ query }}\n        <<<<\n\n        OUTPUT_JSON_SCHEMA:\n        {{ ctx.output_format }}\n\n        Before OUTPUT_JSON_SCHEMA, list 5 intentions the user may have.\n        --- EXAMPLES ---\n        1. <intent>\n        2. <intent>\n        3. <intent>\n        4. <intent>\n        5. <intent>\n\n        {\n            ... // OUTPUT_JSON_SCHEMA\n        }\n    \"#\n}\n\nclass RaysData {\n    dataType DataType\n    value Resume | Event\n}\n\nenum DataType {\n    Resume\n    Event\n}\n\nclass Event {\n    title string\n    date string\n    location string\n    description string\n}\n\nfunction GetDataType(text: string) -> RaysData {\n    client GPT4\n    prompt #\"\n        Extract the relevant info.\n\n        Text:\n        <<<<\n        {{ text }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}",
     "test-files/load-test/memory.baml": "\n\nclass MemoryObject {\n  id string\n  name string\n  description string\n}\n\nclass ComplexMemoryObject {\n  id string\n  name string\n  description string\n  metadata (string | int | float)[] @description(#\"\n    Additional metadata about the memory object, which can be a mix of types.\n  \"#)\n}\n\nclass AnotherObject {\n  id string\n  thingy2 string\n  thingy3 string\n}\n\nclass TestMemoryOutput {\n  items (MemoryObject | ComplexMemoryObject | AnotherObject)[] @description(#\"\n    Add 10 items, which can be either simple MemoryObjects or more complex MemoryObjects with metadata.\n  \"#)\n  more_items (MemoryObject | ComplexMemoryObject | AnotherObject)[] @description(#\"\n    Add 3 more items, which can be either simple MemoryObjects or more complex MemoryObjects with metadata.\n  \"#)\n}\n\n\nfunction TestMemory(input: string) -> TestMemoryOutput {\n  client GPT35\n  prompt #\"\n    Return a json blob that matches the schema:\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestName {\n  functions [TestMemory]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
+    "test-files/models/deepseek-azure.baml": "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
     "test-files/providers/anthropic.baml": "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}",
     "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}",
     "test-files/providers/azure.baml": "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
diff --git a/integ-tests/python/baml_client/sync_client.py b/integ-tests/python/baml_client/sync_client.py
index 476cd7193..8705d7b94 100644
--- a/integ-tests/python/baml_client/sync_client.py
+++ b/integ-tests/python/baml_client/sync_client.py
@@ -2324,6 +2324,29 @@ def TakeRecAliasDep(
       )
       return cast(types.RecursiveAliasDependency, raw.cast_to(types, types, partial_types, False))
     
+    def TellStory(
+        self,
+        story: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      raw = self.__runtime.call_function_sync(
+        "TellStory",
+        {
+          "story": story,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+      return cast(str, raw.cast_to(types, types, partial_types, False))
+    
     def TestAnthropic(
         self,
         input: str,
@@ -6692,6 +6715,36 @@ def TakeRecAliasDep(
         self.__ctx_manager.get(),
       )
     
+    def TellStory(
+        self,
+        story: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlSyncStream[Optional[str], str]:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      raw = self.__runtime.stream_function_sync(
+        "TellStory",
+        {
+          "story": story,
+        },
+        None,
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+
+      return baml_py.BamlSyncStream[Optional[str], str](
+        raw,
+        lambda x: cast(Optional[str], x.cast_to(types, types, partial_types, True)),
+        lambda x: cast(str, x.cast_to(types, types, partial_types, False)),
+        self.__ctx_manager.get(),
+      )
+    
     def TestAnthropic(
         self,
         input: str,
diff --git a/integ-tests/react/baml_client/async_client.ts b/integ-tests/react/baml_client/async_client.ts
index 7fb741e9b..7c12d8e28 100644
--- a/integ-tests/react/baml_client/async_client.ts
+++ b/integ-tests/react/baml_client/async_client.ts
@@ -2024,6 +2024,26 @@ export class BamlAsyncClient {
     }
   }
   
+  async TellStory(
+      story: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): Promise<string> {
+    try {
+      const raw = await this.runtime.callFunction(
+        "TellStory",
+        {
+          "story": story
+        },
+        this.ctx_manager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   async TestAnthropic(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
@@ -5804,6 +5824,32 @@ class BamlStreamClient {
     }
   }
   
+  TellStory(
+      story: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): BamlStream<string, string> {
+    try {
+      const raw = this.runtime.streamFunction(
+        "TellStory",
+        {
+          "story": story
+        },
+        undefined,
+        this.ctx_manager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      )
+      return new BamlStream<string, string>(
+        raw,
+        (a): string => a,
+        (a): string => a,
+        this.ctx_manager.cloneContext(),
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAnthropic(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
diff --git a/integ-tests/react/baml_client/inlinedbaml.ts b/integ-tests/react/baml_client/inlinedbaml.ts
index 634a135c4..b96af7f6e 100644
--- a/integ-tests/react/baml_client/inlinedbaml.ts
+++ b/integ-tests/react/baml_client/inlinedbaml.ts
@@ -93,6 +93,7 @@ const fileMap = {
   "test-files/functions/prompts/with-chat-messages.baml": "\nfunction PromptTestOpenAIChat(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestOpenAIChatNoSystem(input: string) -> string {\n  client GPT35\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChat(input: string) -> string {\n  client Claude\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChatNoSystem(input: string) -> string {\n  client Claude\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\ntest TestSystemAndNonSystemChat1 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"cats\"\n  }\n}\n\ntest TestSystemAndNonSystemChat2 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"lion\"\n  }\n}",
   "test-files/functions/v2/basic.baml": "\n\nfunction ExtractResume2(resume: string) -> Resume {\n    client GPT4\n    prompt #\"\n        {{ _.role('system') }}\n\n        Extract the following information from the resume:\n\n        Resume:\n        <<<<\n        {{ resume }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}\n\n\nclass WithReasoning {\n    value string\n    reasoning string @description(#\"\n        Why the value is a good fit.\n    \"#)\n}\n\n\nclass SearchParams {\n    dateRange int? @description(#\"\n        In ISO duration format, e.g. P1Y2M10D.\n    \"#)\n    location string[]\n    jobTitle WithReasoning? @description(#\"\n        An exact job title, not a general category.\n    \"#)\n    company WithReasoning? @description(#\"\n        The exact name of the company, not a product or service.\n    \"#)\n    description WithReasoning[] @description(#\"\n        Any specific projects or features the user is looking for.\n    \"#)\n    tags (Tag | string)[]\n}\n\nenum Tag {\n    Security\n    AI\n    Blockchain\n}\n\nfunction GetQuery(query: string) -> SearchParams {\n    client GPT4\n    prompt #\"\n        Extract the following information from the query:\n\n        Query:\n        <<<<\n        {{ query }}\n        <<<<\n\n        OUTPUT_JSON_SCHEMA:\n        {{ ctx.output_format }}\n\n        Before OUTPUT_JSON_SCHEMA, list 5 intentions the user may have.\n        --- EXAMPLES ---\n        1. <intent>\n        2. <intent>\n        3. <intent>\n        4. <intent>\n        5. <intent>\n\n        {\n            ... // OUTPUT_JSON_SCHEMA\n        }\n    \"#\n}\n\nclass RaysData {\n    dataType DataType\n    value Resume | Event\n}\n\nenum DataType {\n    Resume\n    Event\n}\n\nclass Event {\n    title string\n    date string\n    location string\n    description string\n}\n\nfunction GetDataType(text: string) -> RaysData {\n    client GPT4\n    prompt #\"\n        Extract the relevant info.\n\n        Text:\n        <<<<\n        {{ text }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}",
   "test-files/load-test/memory.baml": "\n\nclass MemoryObject {\n  id string\n  name string\n  description string\n}\n\nclass ComplexMemoryObject {\n  id string\n  name string\n  description string\n  metadata (string | int | float)[] @description(#\"\n    Additional metadata about the memory object, which can be a mix of types.\n  \"#)\n}\n\nclass AnotherObject {\n  id string\n  thingy2 string\n  thingy3 string\n}\n\nclass TestMemoryOutput {\n  items (MemoryObject | ComplexMemoryObject | AnotherObject)[] @description(#\"\n    Add 10 items, which can be either simple MemoryObjects or more complex MemoryObjects with metadata.\n  \"#)\n  more_items (MemoryObject | ComplexMemoryObject | AnotherObject)[] @description(#\"\n    Add 3 more items, which can be either simple MemoryObjects or more complex MemoryObjects with metadata.\n  \"#)\n}\n\n\nfunction TestMemory(input: string) -> TestMemoryOutput {\n  client GPT35\n  prompt #\"\n    Return a json blob that matches the schema:\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestName {\n  functions [TestMemory]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
+  "test-files/models/deepseek-azure.baml": "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
   "test-files/providers/anthropic.baml": "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}",
   "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}",
   "test-files/providers/azure.baml": "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
diff --git a/integ-tests/react/baml_client/react/hooks.tsx b/integ-tests/react/baml_client/react/hooks.tsx
index e99f8b049..80eae0655 100644
--- a/integ-tests/react/baml_client/react/hooks.tsx
+++ b/integ-tests/react/baml_client/react/hooks.tsx
@@ -5302,6 +5302,56 @@ export function useTakeRecAliasDep(
   }
   throw new Error('Invalid props')
 }
+/**
+ * A specialized hook for the TellStory BAML function that supports both streaming and non‑streaming responses.
+ *
+ * **Input Types:**
+ *
+ * - story: string
+ *
+ *
+ * **Return Type:**
+ * - **Non‑streaming:** string
+ * - **Streaming Partial:** string
+ * - **Streaming Final:** string
+ *
+ * **Usage Patterns:**
+ * 1. **Non‑streaming (Default)**
+ *    - Best for quick responses and simple UI updates.
+ * 2. **Streaming**
+ *    - Ideal for long‑running operations or real‑time feedback.
+ *
+ * **Edge Cases:**
+ * - Ensure robust error handling via `onError`.
+ * - Handle cases where partial data may be incomplete or missing.
+ *
+ * @example
+ * ```tsx
+ * // Basic non‑streaming usage:
+ * const { data, error, isLoading, mutate } = useTellStory({ stream: false});
+ *
+ * // Streaming usage:
+ * const { data, streamData, isLoading, error, mutate } = useTellStory({
+ *   stream: true | undefined,
+ *   onStreamData: (partial) => console.log('Partial update:', partial),
+ *   onFinalData: (final) => console.log('Final result:', final),
+ *   onError: (err) => console.error('Error:', err),
+ * });
+ * ```
+ */
+export function useTellStory(props: HookInput<'TellStory', { stream: false }>): HookOutput<'TellStory', { stream: false }>
+export function useTellStory(props?: HookInput<'TellStory', { stream?: true }>): HookOutput<'TellStory', { stream: true }>
+export function useTellStory(
+  props: HookInput<'TellStory', { stream?: boolean }> = {},
+): HookOutput<'TellStory', { stream: true }> | HookOutput<'TellStory', { stream: false }> {
+  if (isNotStreamingProps(props)) {
+    return useBamlAction(Actions.TellStory, props)
+  }
+  if (isStreamingProps(props)) {
+    return useBamlAction(StreamingActions.TellStory, props)
+  }
+  throw new Error('Invalid props')
+}
 /**
  * A specialized hook for the TestAnthropic BAML function that supports both streaming and non‑streaming responses.
  *
diff --git a/integ-tests/react/baml_client/react/server.ts b/integ-tests/react/baml_client/react/server.ts
index 65e4482d5..e140abcbf 100644
--- a/integ-tests/react/baml_client/react/server.ts
+++ b/integ-tests/react/baml_client/react/server.ts
@@ -1829,6 +1829,24 @@ export const TakeRecAliasDep = async (
   );
 };
 
+/**
+ * Executes the "TellStory" BAML action.
+ *
+ * This server action calls the underlying BAML function "TellStory"
+ * with the specified parameters.
+ *
+ * @param { string } story - Input parameter.
+ *
+ * @returns {Promise<string>} A promise that resolves with the result of the action.
+ */
+export const TellStory = async (
+  story: string,
+): Promise<string> => {
+  return b.TellStory(
+    story,
+  );
+};
+
 /**
  * Executes the "TestAnthropic" BAML action.
  *
diff --git a/integ-tests/react/baml_client/react/server_streaming.ts b/integ-tests/react/baml_client/react/server_streaming.ts
index 2c729a6c9..ed491024f 100644
--- a/integ-tests/react/baml_client/react/server_streaming.ts
+++ b/integ-tests/react/baml_client/react/server_streaming.ts
@@ -1928,6 +1928,25 @@ export const TakeRecAliasDep = async (
   return Promise.resolve(stream.toStreamable());
 };
 
+/**
+ * Executes the streaming variant of the "TellStory" BAML action.
+ *
+ * This action initiates a streaming response by calling the corresponding
+ * BAML stream function. The returned stream yields incremental updates.
+ *
+ * @param { string } story - Input parameter.
+ *
+ * @returns {ReadableStream<Uint8Array>} A stream that yields incremental updates from the action.
+ */
+export const TellStory = async (
+  story: string,
+): Promise<ReadableStream<Uint8Array>> => {
+  const stream = b.stream.TellStory(
+    story,
+  );
+  return Promise.resolve(stream.toStreamable());
+};
+
 /**
  * Executes the streaming variant of the "TestAnthropic" BAML action.
  *
diff --git a/integ-tests/react/baml_client/react/server_streaming_types.ts b/integ-tests/react/baml_client/react/server_streaming_types.ts
index b9bb33c65..fcdde777a 100644
--- a/integ-tests/react/baml_client/react/server_streaming_types.ts
+++ b/integ-tests/react/baml_client/react/server_streaming_types.ts
@@ -123,6 +123,7 @@ export type StreamingServerTypes = {
   StreamUnionIntegers: ((number | null) | (string | null) | null)[],
   StreamingCompoundNumbers: partial_types.CompoundBigNumbers,
   TakeRecAliasDep: partial_types.RecursiveAliasDependency,
+  TellStory: string,
   TestAnthropic: string,
   TestAnthropicShorthand: string,
   TestAws: string,
diff --git a/integ-tests/react/baml_client/sync_client.ts b/integ-tests/react/baml_client/sync_client.ts
index bf5e3b639..8e8f7bd8b 100644
--- a/integ-tests/react/baml_client/sync_client.ts
+++ b/integ-tests/react/baml_client/sync_client.ts
@@ -2025,6 +2025,26 @@ export class BamlSyncClient {
     }
   }
   
+  TellStory(
+      story: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): string {
+    try {
+    const raw = this.runtime.callFunctionSync(
+      "TellStory",
+      {
+        "story": story
+      },
+      this.ctx_manager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+    )
+    return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAnthropic(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
diff --git a/integ-tests/ruby/baml_client/client.rb b/integ-tests/ruby/baml_client/client.rb
index 7d720e387..7d0ad441f 100644
--- a/integ-tests/ruby/baml_client/client.rb
+++ b/integ-tests/ruby/baml_client/client.rb
@@ -3218,6 +3218,38 @@ def TakeRecAliasDep(
       (raw.parsed_using_types(Baml::Types, Baml::PartialTypes, false))
     end
 
+    sig {
+      params(
+        varargs: T.untyped,
+        story: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
+      ).returns(String)
+    }
+    def TellStory(
+        *varargs,
+        story:,
+        baml_options: {}
+    )
+      if varargs.any?
+        
+        raise ArgumentError.new("TellStory may only be called with keyword arguments")
+      end
+      if (baml_options.keys - [:client_registry, :tb]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
+      end
+
+      raw = @runtime.call_function(
+        "TellStory",
+        {
+          story: story,
+        },
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+      )
+      (raw.parsed_using_types(Baml::Types, Baml::PartialTypes, false))
+    end
+
     sig {
       params(
         varargs: T.untyped,
@@ -8616,6 +8648,41 @@ def TakeRecAliasDep(
       )
     end
 
+    sig {
+      params(
+        varargs: T.untyped,
+        story: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
+      ).returns(Baml::BamlStream[String])
+    }
+    def TellStory(
+        *varargs,
+        story:,
+        baml_options: {}
+    )
+      if varargs.any?
+        
+        raise ArgumentError.new("TellStory may only be called with keyword arguments")
+      end
+      if (baml_options.keys - [:client_registry, :tb]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
+      end
+
+      raw = @runtime.stream_function(
+        "TellStory",
+        {
+          story: story,
+        },
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+      )
+      Baml::BamlStream[T.nilable(String), String].new(
+        ffi_stream: raw,
+        ctx_manager: @ctx_manager
+      )
+    end
+
     sig {
       params(
         varargs: T.untyped,
diff --git a/integ-tests/ruby/baml_client/inlined.rb b/integ-tests/ruby/baml_client/inlined.rb
index be0f195d2..f904bc5c9 100644
--- a/integ-tests/ruby/baml_client/inlined.rb
+++ b/integ-tests/ruby/baml_client/inlined.rb
@@ -92,6 +92,7 @@ module Inlined
         "test-files/functions/prompts/with-chat-messages.baml" => "\nfunction PromptTestOpenAIChat(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestOpenAIChatNoSystem(input: string) -> string {\n  client GPT35\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChat(input: string) -> string {\n  client Claude\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChatNoSystem(input: string) -> string {\n  client Claude\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\ntest TestSystemAndNonSystemChat1 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"cats\"\n  }\n}\n\ntest TestSystemAndNonSystemChat2 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"lion\"\n  }\n}",
         "test-files/functions/v2/basic.baml" => "\n\nfunction ExtractResume2(resume: string) -> Resume {\n    client GPT4\n    prompt #\"\n        {{ _.role('system') }}\n\n        Extract the following information from the resume:\n\n        Resume:\n        <<<<\n        {{ resume }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}\n\n\nclass WithReasoning {\n    value string\n    reasoning string @description(#\"\n        Why the value is a good fit.\n    \"#)\n}\n\n\nclass SearchParams {\n    dateRange int? @description(#\"\n        In ISO duration format, e.g. P1Y2M10D.\n    \"#)\n    location string[]\n    jobTitle WithReasoning? @description(#\"\n        An exact job title, not a general category.\n    \"#)\n    company WithReasoning? @description(#\"\n        The exact name of the company, not a product or service.\n    \"#)\n    description WithReasoning[] @description(#\"\n        Any specific projects or features the user is looking for.\n    \"#)\n    tags (Tag | string)[]\n}\n\nenum Tag {\n    Security\n    AI\n    Blockchain\n}\n\nfunction GetQuery(query: string) -> SearchParams {\n    client GPT4\n    prompt #\"\n        Extract the following information from the query:\n\n        Query:\n        <<<<\n        {{ query }}\n        <<<<\n\n        OUTPUT_JSON_SCHEMA:\n        {{ ctx.output_format }}\n\n        Before OUTPUT_JSON_SCHEMA, list 5 intentions the user may have.\n        --- EXAMPLES ---\n        1. <intent>\n        2. <intent>\n        3. <intent>\n        4. <intent>\n        5. <intent>\n\n        {\n            ... // OUTPUT_JSON_SCHEMA\n        }\n    \"#\n}\n\nclass RaysData {\n    dataType DataType\n    value Resume | Event\n}\n\nenum DataType {\n    Resume\n    Event\n}\n\nclass Event {\n    title string\n    date string\n    location string\n    description string\n}\n\nfunction GetDataType(text: string) -> RaysData {\n    client GPT4\n    prompt #\"\n        Extract the relevant info.\n\n        Text:\n        <<<<\n        {{ text }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}",
         "test-files/load-test/memory.baml" => "\n\nclass MemoryObject {\n  id string\n  name string\n  description string\n}\n\nclass ComplexMemoryObject {\n  id string\n  name string\n  description string\n  metadata (string | int | float)[] @description(#\"\n    Additional metadata about the memory object, which can be a mix of types.\n  \"#)\n}\n\nclass AnotherObject {\n  id string\n  thingy2 string\n  thingy3 string\n}\n\nclass TestMemoryOutput {\n  items (MemoryObject | ComplexMemoryObject | AnotherObject)[] @description(#\"\n    Add 10 items, which can be either simple MemoryObjects or more complex MemoryObjects with metadata.\n  \"#)\n  more_items (MemoryObject | ComplexMemoryObject | AnotherObject)[] @description(#\"\n    Add 3 more items, which can be either simple MemoryObjects or more complex MemoryObjects with metadata.\n  \"#)\n}\n\n\nfunction TestMemory(input: string) -> TestMemoryOutput {\n  client GPT35\n  prompt #\"\n    Return a json blob that matches the schema:\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestName {\n  functions [TestMemory]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
+        "test-files/models/deepseek-azure.baml" => "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
         "test-files/providers/anthropic.baml" => "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}",
         "test-files/providers/aws.baml" => "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}",
         "test-files/providers/azure.baml" => "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
diff --git a/integ-tests/typescript/baml_client/async_client.ts b/integ-tests/typescript/baml_client/async_client.ts
index 7fb741e9b..7c12d8e28 100644
--- a/integ-tests/typescript/baml_client/async_client.ts
+++ b/integ-tests/typescript/baml_client/async_client.ts
@@ -2024,6 +2024,26 @@ export class BamlAsyncClient {
     }
   }
   
+  async TellStory(
+      story: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): Promise<string> {
+    try {
+      const raw = await this.runtime.callFunction(
+        "TellStory",
+        {
+          "story": story
+        },
+        this.ctx_manager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   async TestAnthropic(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
@@ -5804,6 +5824,32 @@ class BamlStreamClient {
     }
   }
   
+  TellStory(
+      story: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): BamlStream<string, string> {
+    try {
+      const raw = this.runtime.streamFunction(
+        "TellStory",
+        {
+          "story": story
+        },
+        undefined,
+        this.ctx_manager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      )
+      return new BamlStream<string, string>(
+        raw,
+        (a): string => a,
+        (a): string => a,
+        this.ctx_manager.cloneContext(),
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAnthropic(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
diff --git a/integ-tests/typescript/baml_client/inlinedbaml.ts b/integ-tests/typescript/baml_client/inlinedbaml.ts
index 634a135c4..b96af7f6e 100644
--- a/integ-tests/typescript/baml_client/inlinedbaml.ts
+++ b/integ-tests/typescript/baml_client/inlinedbaml.ts
@@ -93,6 +93,7 @@ const fileMap = {
   "test-files/functions/prompts/with-chat-messages.baml": "\nfunction PromptTestOpenAIChat(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestOpenAIChatNoSystem(input: string) -> string {\n  client GPT35\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChat(input: string) -> string {\n  client Claude\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChatNoSystem(input: string) -> string {\n  client Claude\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\ntest TestSystemAndNonSystemChat1 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"cats\"\n  }\n}\n\ntest TestSystemAndNonSystemChat2 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"lion\"\n  }\n}",
   "test-files/functions/v2/basic.baml": "\n\nfunction ExtractResume2(resume: string) -> Resume {\n    client GPT4\n    prompt #\"\n        {{ _.role('system') }}\n\n        Extract the following information from the resume:\n\n        Resume:\n        <<<<\n        {{ resume }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}\n\n\nclass WithReasoning {\n    value string\n    reasoning string @description(#\"\n        Why the value is a good fit.\n    \"#)\n}\n\n\nclass SearchParams {\n    dateRange int? @description(#\"\n        In ISO duration format, e.g. P1Y2M10D.\n    \"#)\n    location string[]\n    jobTitle WithReasoning? @description(#\"\n        An exact job title, not a general category.\n    \"#)\n    company WithReasoning? @description(#\"\n        The exact name of the company, not a product or service.\n    \"#)\n    description WithReasoning[] @description(#\"\n        Any specific projects or features the user is looking for.\n    \"#)\n    tags (Tag | string)[]\n}\n\nenum Tag {\n    Security\n    AI\n    Blockchain\n}\n\nfunction GetQuery(query: string) -> SearchParams {\n    client GPT4\n    prompt #\"\n        Extract the following information from the query:\n\n        Query:\n        <<<<\n        {{ query }}\n        <<<<\n\n        OUTPUT_JSON_SCHEMA:\n        {{ ctx.output_format }}\n\n        Before OUTPUT_JSON_SCHEMA, list 5 intentions the user may have.\n        --- EXAMPLES ---\n        1. <intent>\n        2. <intent>\n        3. <intent>\n        4. <intent>\n        5. <intent>\n\n        {\n            ... // OUTPUT_JSON_SCHEMA\n        }\n    \"#\n}\n\nclass RaysData {\n    dataType DataType\n    value Resume | Event\n}\n\nenum DataType {\n    Resume\n    Event\n}\n\nclass Event {\n    title string\n    date string\n    location string\n    description string\n}\n\nfunction GetDataType(text: string) -> RaysData {\n    client GPT4\n    prompt #\"\n        Extract the relevant info.\n\n        Text:\n        <<<<\n        {{ text }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}",
   "test-files/load-test/memory.baml": "\n\nclass MemoryObject {\n  id string\n  name string\n  description string\n}\n\nclass ComplexMemoryObject {\n  id string\n  name string\n  description string\n  metadata (string | int | float)[] @description(#\"\n    Additional metadata about the memory object, which can be a mix of types.\n  \"#)\n}\n\nclass AnotherObject {\n  id string\n  thingy2 string\n  thingy3 string\n}\n\nclass TestMemoryOutput {\n  items (MemoryObject | ComplexMemoryObject | AnotherObject)[] @description(#\"\n    Add 10 items, which can be either simple MemoryObjects or more complex MemoryObjects with metadata.\n  \"#)\n  more_items (MemoryObject | ComplexMemoryObject | AnotherObject)[] @description(#\"\n    Add 3 more items, which can be either simple MemoryObjects or more complex MemoryObjects with metadata.\n  \"#)\n}\n\n\nfunction TestMemory(input: string) -> TestMemoryOutput {\n  client GPT35\n  prompt #\"\n    Return a json blob that matches the schema:\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestName {\n  functions [TestMemory]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
+  "test-files/models/deepseek-azure.baml": "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
   "test-files/providers/anthropic.baml": "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}",
   "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}",
   "test-files/providers/azure.baml": "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
diff --git a/integ-tests/typescript/baml_client/sync_client.ts b/integ-tests/typescript/baml_client/sync_client.ts
index bf5e3b639..8e8f7bd8b 100644
--- a/integ-tests/typescript/baml_client/sync_client.ts
+++ b/integ-tests/typescript/baml_client/sync_client.ts
@@ -2025,6 +2025,26 @@ export class BamlSyncClient {
     }
   }
   
+  TellStory(
+      story: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): string {
+    try {
+    const raw = this.runtime.callFunctionSync(
+      "TellStory",
+      {
+        "story": story
+      },
+      this.ctx_manager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+    )
+    return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAnthropic(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }