Adding support for Azure AI Foundary (#1469)

> [!IMPORTANT] > Adds support for Azure AI Foundary by introducing new client functionality, updating documentation, and adding integration tests. > > - **Behavior**: > - Adds `ensure_query_params()` in `helpers.rs` to handle query parameters for Azure AI Foundary. > - Modifies `UnresolvedOpenAI` in `openai.rs` to use `ensure_query_params()` for query parameter handling. > - Removes commented-out code related to completion streams in `openai_client.rs`. > - **Documentation**: > - Adds `azure-ai-foundary.mdx` for Azure AI Foundary usage instructions. > - Updates `docs.yml` to include Azure AI Foundary documentation. > - **Testing**: > - Adds `deepseek-azure.baml` for integration testing of Azure AI Foundary. > - Updates `gen-baml-client.py` to include React in the list of targets. > - Modifies `openapi.yaml` to include `TellStory` operation for Azure AI Foundary. > - Updates various client files in `python`, `react`, `ruby`, and `typescript` to support `TellStory` function. > > <sup>This description was created by </sup>[<img alt="Ellipsis" src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=BoundaryML%2Fbaml&utm_source=github&utm_medium=referral)<sup> for aff70a2. It will automatically update as commits are pushed.</sup>
BoundaryML · Feb 17, 2025 · 92e139a · 92e139a
1 parent 4a38455
commit 92e139a
Show file tree

Hide file tree

Showing 25 changed files with 586 additions and 104 deletions.
diff --git a/engine/baml-lib/llm-client/src/clients/helpers.rs b/engine/baml-lib/llm-client/src/clients/helpers.rs
@@ -326,6 +326,28 @@ impl<Meta: Clone> PropertyHandler<Meta> {
         UnresolvedAllowedRoleMetadata::None
     }
 
+    pub fn ensure_query_params(&mut self) -> Option<IndexMap<String, StringOr>> {
+        self.ensure_map("query_params", false).map(|(_, value, _)| {
+            value
+                .into_iter()
+                .filter_map(|(k, (_, v))| match v.as_str() {
+                    Some(s) => Some((k, s.clone())),
+                    None => {
+                        self.push_error(
+                            format!(
+                                "Query param key {} must have a string value. Got: {}",
+                                k,
+                                v.r#type()
+                            ),
+                            v.meta().clone(),
+                        );
+                        None
+                    }
+                })
+                .collect()
+        })
+    }
+
     pub fn ensure_headers(&mut self) -> Option<IndexMap<String, StringOr>> {
         self.ensure_map("headers", false).map(|(_, value, _)| {
             value

diff --git a/engine/baml-lib/llm-client/src/clients/openai.rs b/engine/baml-lib/llm-client/src/clients/openai.rs
@@ -289,10 +289,17 @@ impl<Meta: Clone> UnresolvedOpenAI<Meta> {
             .map(|v| v.clone())
             .unwrap_or_else(|| StringOr::EnvVar("AZURE_OPENAI_API_KEY".to_string()));
 
-        let mut query_params = IndexMap::new();
-        if let Some((_, v, _)) = properties.ensure_string("api_version", false) {
-            query_params.insert("api-version".to_string(), v.clone());
-        }
+        let query_params = match properties.ensure_query_params() {
+            Some(query_params) => query_params,
+            None => {
+                // you can override the query params by providing a query_params field in the client spec
+                let mut query_params = IndexMap::new();
+                if let Some((_, v, _)) = properties.ensure_string("api_version", false) {
+                    query_params.insert("api-version".to_string(), v.clone());
+                }
+                query_params
+            }
+        };
 
         let mut instance = Self::create_common(properties, base_url, None)?;
         instance.query_params = query_params;
@@ -342,6 +349,7 @@ impl<Meta: Clone> UnresolvedOpenAI<Meta> {
         let supported_request_modes = properties.ensure_supported_request_modes();
         let headers = properties.ensure_headers().unwrap_or_default();
         let finish_reason_filter = properties.ensure_finish_reason_filter();
+        let query_params = properties.ensure_query_params().unwrap_or_default();
         let (properties, errors) = properties.finalize();
 
         if !errors.is_empty() {
@@ -356,7 +364,7 @@ impl<Meta: Clone> UnresolvedOpenAI<Meta> {
             supported_request_modes,
             headers,
             properties,
-            query_params: IndexMap::new(),
+            query_params,
             finish_reason_filter,
         })
     }

diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs
@@ -86,76 +86,6 @@ impl WithClient for OpenAIClient {
 }
 
 impl WithNoCompletion for OpenAIClient {}
-// TODO: Enable completion with support for completion streams
-// impl WithCompletion for OpenAIClient {
-//     fn completion_options(
-//         &self,
-//         ctx: &RuntimeContext,
-//     ) -> Result<internal_baml_jinja::CompletionOptions> {
-//         return Ok(internal_baml_jinja::CompletionOptions::new("\n".into()));
-//     }
-
-//     async fn completion(&self, ctx: &RuntimeContext, prompt: &String) -> LLMResponse {
-//         let (response, system_start, instant_start) =
-//             match make_parsed_request::<CompletionResponse>(
-//                 self,
-//                 either::Either::Left(prompt),
-//                 false,
-//             )
-//             .await
-//             {
-//                 Ok(v) => v,
-//                 Err(e) => return e,
-//             };
-
-//         if response.choices.len() != 1 {
-//             return LLMResponse::LLMFailure(LLMErrorResponse {
-//                 client: self.context.name.to_string(),
-//                 model: None,
-//                 prompt: internal_baml_jinja::RenderedPrompt::Completion(prompt.clone()),
-//                 start_time: system_start,
-//                 latency: instant_start.elapsed(),
-//                 request_options: self.properties.properties.clone(),
-//                 message: format!(
-//                     "Expected exactly one choices block, got {}",
-//                     response.choices.len()
-//                 ),
-//                 code: ErrorCode::Other(200),
-//             });
-//         }
-
-//         let usage = response.usage.as_ref();
-
-//         LLMResponse::Success(LLMCompleteResponse {
-//             client: self.context.name.to_string(),
-//             prompt: internal_baml_jinja::RenderedPrompt::Completion(prompt.clone()),
-//             content: response.choices[0].text.clone(),
-//             start_time: system_start,
-//             latency: instant_start.elapsed(),
-//             model: response.model,
-//             request_options: self.properties.properties.clone(),
-//             metadata: LLMCompleteResponseMetadata {
-//                 baml_is_complete: match response.choices.get(0) {
-//                     Some(c) => match c.finish_reason {
-//                         Some(FinishReason::Stop) => true,
-//                         _ => false,
-//                     },
-//                     None => false,
-//                 },
-//                 finish_reason: match response.choices.get(0) {
-//                     Some(c) => match c.finish_reason {
-//                         Some(FinishReason::Stop) => Some(FinishReason::Stop.to_string()),
-//                         _ => None,
-//                     },
-//                     None => None,
-//                 },
-//                 prompt_tokens: usage.map(|u| u.prompt_tokens),
-//                 output_tokens: usage.map(|u| u.completion_tokens),
-//                 total_tokens: usage.map(|u| u.total_tokens),
-//             },
-//         })
-//     }
-// }
 
 impl WithChat for OpenAIClient {
     async fn chat(&self, _ctx: &RuntimeContext, prompt: &[RenderedChatMessage]) -> LLMResponse {

diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/types.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/types.rs
@@ -13,7 +13,7 @@ pub struct ChatCompletionGeneric<C> {
     /// A list of chat completion choices. Can be more than one if `n` is greater than 1.s
     pub choices: Vec<C>,
     /// The Unix timestamp (in seconds) of when the chat completion was created.
-    pub created: u32,
+    pub created: Option<u32>,
     /// The model used for the chat completion.
     pub model: String,
     /// This fingerprint represents the backend configuration that the model runs with.
@@ -22,7 +22,7 @@ pub struct ChatCompletionGeneric<C> {
     pub system_fingerprint: Option<String>,
 
     /// The object type, which is `chat.completion` for non-streaming chat completion, `chat.completion.chunk` for streaming chat completion.
-    pub object: String,
+    pub object: Option<String>,
     pub usage: Option<CompletionUsage>,
 }
 

diff --git a/fern/03-reference/baml/clients/providers/azure-ai-foundary.mdx b/fern/03-reference/baml/clients/providers/azure-ai-foundary.mdx
@@ -0,0 +1,20 @@
+---
+title: Azure AI Foundary  
+---
+
+To use the Azure AI Foundary ([https://ai.azure.com](https://ai.azure.com)), you can leverage the [`openai-generic`](/docs/snippets/clients/providers/openai) provider.
+
+**Example:**
+
+```baml BAML
+client<llm> MyClient {
+  provider "openai-generic"
+  options {
+    base_url "https://RESOURCE_NAME.REGION.models.ai.azure.com"
+    api_key env.MY_API_KEY
+  }
+}
+```
+
+See here to see how to get your API key and base url:
+<img src="/assets/languages/azure-ai-foundary.png" alt="Azure AI Foundary" />
diff --git a/fern/assets/languages/azure-ai-foundary.png b/fern/assets/languages/azure-ai-foundary.png
diff --git a/fern/docs.yml b/fern/docs.yml
@@ -462,6 +462,8 @@ navigation:
             path: 03-reference/baml/clients/providers/azure.mdx
           - page: "openai-generic"
             path: 03-reference/baml/clients/providers/openai-generic.mdx
+          - page: "openai-generic: Azure AI Foundary"
+            path: 03-reference/baml/clients/providers/azure-ai-foundary.mdx
           - page: "openai-generic: Groq"
             path: 03-reference/baml/clients/providers/groq.mdx
           - page: "openai-generic: Hugging Face"

diff --git a/integ-tests/baml_src/test-files/models/deepseek-azure.baml b/integ-tests/baml_src/test-files/models/deepseek-azure.baml
@@ -0,0 +1,25 @@
+client<llm> DeepSeekAzure {
+    provider openai-generic
+    options {
+        base_url "https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com"
+        api_key env.DEEPSEEK_AZURE_API_KEY
+        max_tokens 10
+    }
+}
+
+function TellStory(story: string) -> string {
+  client DeepSeekAzure
+  prompt #"
+    You are a storyteller. Tell a story about the following:
+    {{ _.role("user") }} {{ story }}
+  "#
+}
+
+test TellStory {
+  functions [TellStory]
+  args {
+    story #"
+      Once upon a time, there was a cat who loved to play with yarn.
+    "#
+  }
+}