Azure · dargilco · Feb 14, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
diff --git a/.vscode/cspell.json b/.vscode/cspell.json
@@ -1369,7 +1369,9 @@
         "wday",
         "Hola",
         "cómo",
-        "estás"
+        "estás",
+        "logprobs",
+        "conver",
       ]
     },
     {

@@ -1,5 +1,20 @@
 # Release History
 
+## 1.0.0b9 (2025-02-14)
+
+### Features Added
+
+* Added support for chat completion messages with `developer` role.
+* Updated package document with an example of how to set custom HTTP request headers,
+and an example of providing chat completion "messages" as an array of Python `dict` objects.
+* Add a descriptive Exception error message when `load_client` function or
+`get_model_info` method fails to run on an endpoint that does not support the `/info` route.
+
+### Bugs Fixed
+
+* Fix for Exception raised while parsing Chat Completions streaming response, in some rare cases, for
+multibyte UTF-8 languages like Chinese ([GitHub Issue 39565](https://github.com/Azure/azure-sdk-for-python/issues/39565)).
+
 ## 1.0.0b8 (2025-01-29)
 
 ### Features Added

@@ -239,7 +239,8 @@ In the following sections you will find simple examples of:
 
 * [Chat completions](#chat-completions-example)
 * [Streaming chat completions](#streaming-chat-completions-example)
-* [Chat completions with additional model-specific parameters](#chat-completions-with-additional-model-specific-parameters)
+* [Adding model-specific parameters](#adding-model-specific-parameters)
+* [Adding HTTP request headers](#adding-http-request-headers)
 * [Text Embeddings](#text-embeddings-example)
 * [Image Embeddings](#image-embeddings-example)
 
@@ -274,7 +275,7 @@ print(response.choices[0].message.content)
 
 <!-- END SNIPPET -->
 
-The following types of messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See also samples:
+The following types of messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`, `DeveloperMessage`. See also samples:
 
 * [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`.
 * [sample_chat_completions_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py) for usage of `UserMessage` that
@@ -284,10 +285,9 @@ includes sending image data read from a local file.
 * [sample_chat_completions_with_audio_data.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py) for usage of `UserMessage` that includes sending audio data read from a local file.
 * [sample_chat_completions_with_structured_output.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py) and [sample_chat_completions_with_structured_output_pydantic.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py) for configuring the service to respond with a JSON-formatted string, adhering to your schema.
 
+Alternatively, you can provide the full request body as a Python dictionary (`dict` object) instead of using the strongly typed classes like `SystemMessage` and `UserMessage`:
 
-Alternatively, you can provide the messages as dictionary instead of using the strongly typed classes like `SystemMessage` and `UserMessage`:
-
-<!-- SNIPPET:sample_chat_completions_from_input_dict.chat_completions -->
+<!-- SNIPPET:sample_chat_completions_from_input_dict.chat_completions_full_request_as_dict -->
 
 ```python
 response = client.complete(
@@ -313,6 +313,27 @@ response = client.complete(
 
 <!-- END SNIPPET -->
 
+Or you can provide just the `messages` input argument as a list of Python `dict`:
+
+<!-- SNIPPET:sample_chat_completions_from_input_dict.chat_completions_messages_as_dict -->
+
+```python
+response = client.complete(
+    messages=[
+        {
+            "role": "system",
+            "content": "You are an AI assistant that helps people find information.",
+        },
+        {
+            "role": "user",
+            "content": "How many feet are in a mile?",
+        },
+    ]
+)
+```
+
+<!-- END SNIPPET -->
+
 To generate completions for additional messages, simply call `client.complete` multiple times using the same `client`.
 
 ### Streaming chat completions example
@@ -339,7 +360,10 @@ response = client.complete(
 )
 
 for update in response:
-    print(update.choices[0].delta.content or "", end="", flush=True)
+    if update.choices and update.choices[0].delta:
+        print(update.choices[0].delta.content or "", end="", flush=True)
+    if update.usage:
+        print(f"\n\nUsage: {update.usage}")
 
 client.close()
 ```
@@ -350,9 +374,9 @@ In the above `for` loop that prints the results you should see the answer progre
 
 To generate completions for additional messages, simply call `client.complete` multiple times using the same `client`.
 
-### Chat completions with additional model-specific parameters
+### Adding model-specific parameters
 
-In this example, extra JSON elements are inserted at the root of the request body by setting `model_extras` when calling the `complete` method. These are intended for AI models that require additional model-specific parameters beyond what is defined in the REST API [Request Body table](https://learn.microsoft.com/azure/ai-studio/reference/reference-model-inference-chat-completions#request-body).
+In this example, extra JSON elements are inserted at the root of the request body by setting `model_extras` when calling the `complete` method of the `ChatCompletionsClient`. These are intended for AI models that require additional model-specific parameters beyond what is defined in the REST API [Request Body table](https://learn.microsoft.com/azure/ai-studio/reference/reference-model-inference-chat-completions#request-body).
 
 <!-- SNIPPET:sample_chat_completions_with_model_extras.model_extras -->
 
@@ -383,6 +407,23 @@ In the above example, this will be the JSON payload in the HTTP request:
 
 Note that by default, the service will reject any request payload that includes extra parameters. In order to change the default service behaviour, when the `complete` method includes `model_extras`, the client library will automatically add the HTTP request header `"extra-parameters": "pass-through"`.
 
+Use the same method to add additional paramaters in the request of other clients in this package.
+
+### Adding HTTP request headers
+
+To add your own HTTP request headers, include a `headers` keyword in the client constructor, and specify a `dict` with your
+header names and values. For example:
+
+```python
+client = ChatCompletionsClient(
+    endpoint=endpoint,
+    credential=AzureKeyCredential(key),
+    headers={"header1", "value1", "header2", "value2"}
+)
+```
+
+And similarly for the other clients in this package.
+
 ### Text Embeddings example
 
 This example demonstrates how to get text embeddings, for a Serverless API or Managed Compute endpoint, with key authentication, assuming `endpoint` and `key` are already defined. For Entra ID authentication, GitHub models endpoint or Azure OpenAI endpoint, modify the code to create the client as specified in the above sections.
@@ -450,46 +491,6 @@ data[0]: length=1024, [0.0103302, -0.04425049, ..., -0.011543274, -0.0009088516]
 
 To generate image embeddings for additional images, simply call `client.embed` multiple times using the same `client`.
 
-<!--
-### Image Embeddings example
-
-This example demonstrates how to get image embeddings.
-
- <! -- SNIPPET:sample_image_embeddings.image_embeddings -- >
-
-```python
-from azure.ai.inference import ImageEmbeddingsClient
-from azure.ai.inference.models import ImageEmbeddingInput
-from azure.core.credentials import AzureKeyCredential
-
-with open("sample1.png", "rb") as f:
-    image1: str = base64.b64encode(f.read()).decode("utf-8")
-with open("sample2.png", "rb") as f:
-    image2: str = base64.b64encode(f.read()).decode("utf-8")
-
-client = ImageEmbeddingsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
-
-response = client.embed(input=[ImageEmbeddingInput(image=image1), ImageEmbeddingInput(image=image2)])
-
-for item in response.data:
-    length = len(item.embedding)
-    print(
-        f"data[{item.index}]: length={length}, [{item.embedding[0]}, {item.embedding[1]}, "
-        f"..., {item.embedding[length-2]}, {item.embedding[length-1]}]"
-    )
-```
-
--- END SNIPPET --
-
-The printed result of course depends on the model, but you should see something like this:
-
-```txt
-TBD
-```
-
-To generate embeddings for additional phrases, simply call `client.embed` multiple times using the same `client`.
--->
-
 ## Troubleshooting
 
 ### Exceptions

@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-inference",
-  "Tag": "python/ai/azure-ai-inference_bc7c5bd581"
+  "Tag": "python/ai/azure-ai-inference_3f06cee8a7"
 }
@@ -373,15 +373,34 @@ def __ne__(self, other: typing.Any) -> bool:
         return not self.__eq__(other)
 
     def keys(self) -> typing.KeysView[str]:
+        """
+        :returns: a set-like object providing a view on D's keys
+        :rtype: ~typing.KeysView
+        """
         return self._data.keys()
 
     def values(self) -> typing.ValuesView[typing.Any]:
+        """
+        :returns: an object providing a view on D's values
+        :rtype: ~typing.ValuesView
+        """
         return self._data.values()
 
     def items(self) -> typing.ItemsView[str, typing.Any]:
+        """
+        :returns: set-like object providing a view on D's items
+        :rtype: ~typing.ItemsView
+        """
         return self._data.items()
 
     def get(self, key: str, default: typing.Any = None) -> typing.Any:
+        """
+        Get the value for key if key is in the dictionary, else default.
+        :param str key: The key to look up.
+        :param any default: The value to return if key is not in the dictionary. Defaults to None
+        :returns: D[k] if k in D, else d.
+        :rtype: any
+        """
         try:
             return self[key]
         except KeyError:
@@ -397,17 +416,38 @@ def pop(self, key: str, default: _T) -> _T: ...
     def pop(self, key: str, default: typing.Any) -> typing.Any: ...
 
     def pop(self, key: str, default: typing.Any = _UNSET) -> typing.Any:
+        """
+        Removes specified key and return the corresponding value.
+        :param str key: The key to pop.
+        :param any default: The value to return if key is not in the dictionary
+        :returns: The value corresponding to the key.
+        :rtype: any
+        :raises KeyError: If key is not found and default is not given.
+        """
         if default is _UNSET:
             return self._data.pop(key)
         return self._data.pop(key, default)
 
     def popitem(self) -> typing.Tuple[str, typing.Any]:
+        """
+        Removes and returns some (key, value) pair
+        :returns: The (key, value) pair.
+        :rtype: tuple
+        :raises KeyError: if D is empty.
+        """
         return self._data.popitem()
 
     def clear(self) -> None:
+        """
+        Remove all items from D.
+        """
         self._data.clear()
 
     def update(self, *args: typing.Any, **kwargs: typing.Any) -> None:
+        """
+        Updates D from mapping/iterable E and F.
+        :param any args: Either a mapping object or an iterable of key-value pairs.
+        """
         self._data.update(*args, **kwargs)
 
     @typing.overload
@@ -417,6 +457,13 @@ def setdefault(self, key: str, default: None = None) -> None: ...
     def setdefault(self, key: str, default: typing.Any) -> typing.Any: ...
 
     def setdefault(self, key: str, default: typing.Any = _UNSET) -> typing.Any:
+        """
+        Same as calling D.get(k, d), and setting D[k]=d if k not found
+        :param str key: The key to look up.
+        :param any default: The value to set if key is not in the dictionary
+        :returns: D[k] if k in D, else d.
+        :rtype: any
+        """
         if default is _UNSET:
             return self._data.setdefault(key)
         return self._data.setdefault(key, default)
@@ -910,6 +957,19 @@ def _failsafe_deserialize(
         return None
 
 
+def _failsafe_deserialize_xml(
+    deserializer: typing.Any,
+    value: typing.Any,
+) -> typing.Any:
+    try:
+        return _deserialize_xml(deserializer, value)
+    except DeserializationError:
+        _LOGGER.warning(
+            "Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True
+        )
+        return None
+
+
 class _RestField:
     def __init__(
         self,

@@ -181,15 +181,6 @@ def build_image_embeddings_get_model_info_request(**kwargs: Any) -> HttpRequest:
 
 class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC):
 
-    @overload
-    def _complete(
-        self,
-        body: JSON,
-        *,
-        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
-        content_type: str = "application/json",
-        **kwargs: Any
-    ) -> _models.ChatCompletions: ...
     @overload
     def _complete(
         self,
@@ -214,6 +205,15 @@ def _complete(
         **kwargs: Any
     ) -> _models.ChatCompletions: ...
     @overload
+    def _complete(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+    @overload
     def _complete(
         self,
         body: IO[bytes],
@@ -488,23 +488,23 @@ class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC):
     @overload
     def _embed(
         self,
-        body: JSON,
         *,
+        input: List[str],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult: ...
     @overload
     def _embed(
         self,
+        body: JSON,
         *,
-        input: List[str],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult: ...
     @overload
@@ -701,23 +701,23 @@ class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC):
     @overload
     def _embed(
         self,
-        body: JSON,
         *,
+        input: List[_models.ImageEmbeddingInput],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult: ...
     @overload
     def _embed(
         self,
+        body: JSON,
         *,
-        input: List[_models.ImageEmbeddingInput],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult: ...
     @overload