microsoft · MohMaz · Nov 22, 2024 · Nov 22, 2024 · Nov 22, 2024 · Nov 22, 2024
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/_openai/_openai_client.py b/python/packages/autogen-ext/src/autogen_ext/models/_openai/_openai_client.py
@@ -556,6 +556,7 @@ async def create_stream(
         json_output: Optional[bool] = None,
         extra_create_args: Mapping[str, Any] = {},
         cancellation_token: Optional[CancellationToken] = None,
+        max_consecutive_empty_chunk_tolerance: int = 0,
     ) -> AsyncGenerator[Union[str, CreateResult], None]:
         """
         Creates an AsyncGenerator that will yield a  stream of chat completions based on the provided messages and tools.
@@ -566,6 +567,7 @@ async def create_stream(
             json_output (Optional[bool], optional): If True, the output will be in JSON format. Defaults to None.
             extra_create_args (Mapping[str, Any], optional): Additional arguments for the creation process. Default to `{}`.
             cancellation_token (Optional[CancellationToken], optional): A token to cancel the operation. Defaults to None.
+            max_consecutive_empty_chunk_tolerance (int): The maximum number of consecutive empty chunks to tolerate before raising a ValueError. This seems to only be needed to set when using `AzureOpenAIChatCompletionClient`. Defaults to 0.
 
         Yields:
             AsyncGenerator[Union[str, CreateResult], None]: A generator yielding the completion results as they are produced.
@@ -636,13 +638,29 @@ async def create_stream(
         full_tool_calls: Dict[int, FunctionCall] = {}
         completion_tokens = 0
         logprobs: Optional[List[ChatCompletionTokenLogprob]] = None
+        empty_chunk_count = 0
+
         while True:
             try:
                 chunk_future = asyncio.ensure_future(anext(stream))
                 if cancellation_token is not None:
                     cancellation_token.link_future(chunk_future)
                 chunk = await chunk_future
 
+                # This is to address a bug in AzureOpenAIChatCompletionClient. OpenAIChatCompletionClient works fine.
+                #  https://github.com/microsoft/autogen/issues/4213
+                if len(chunk.choices) == 0:
+                    empty_chunk_count += 1
+                    if max_consecutive_empty_chunk_tolerance == 0:
+                        raise ValueError(
+                            "Consecutive empty chunks found. Change max_empty_consecutive_chunk_tolerance to increase empty chunk tolerance"
+                        )
+                    elif empty_chunk_count >= max_consecutive_empty_chunk_tolerance:
+                        raise ValueError("Exceeded the threshold of receiving consecutive empty chunks")
+                    continue
+                else:
+                    empty_chunk_count = 0
+
                 # to process usage chunk in streaming situations
                 # add    stream_options={"include_usage": True} in the initialization of OpenAIChatCompletionClient(...)
                 # However the different api's