Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes “The model produced invalid content” error when calling functions #3429

Draft
wants to merge 10 commits into
base: 0.2
Choose a base branch
from
1 change: 1 addition & 0 deletions autogen/function_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class Parameters(BaseModel):
type: Literal["object"] = "object"
properties: Dict[str, JsonSchemaValue]
required: List[str]
additionalProperties: bool = False


class Function(BaseModel):
Expand Down
13 changes: 13 additions & 0 deletions autogen/oai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,16 @@ def message_retrieval(
for choice in choices
]

@staticmethod
def _remove_assistant_names_from_messages(params: Dict[str, Any]):
def _remove_assistant_names(messages: List[Dict[str, Any]]):
def _remove_name(message: Dict[str, Any]):
return {k: v for k, v in message.items() if k != "name"}

return [_remove_name(m) if m["role"] == "assistant" else m for m in messages]

return {k: _remove_assistant_names(v) if k == "messages" else v for k, v in params.items()}

def create(self, params: Dict[str, Any]) -> ChatCompletion:
"""Create a completion for a given config using openai's client.

Expand All @@ -207,6 +217,9 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion:
"""
iostream = IOStream.get_default()

# name parameter in an assistant message causes sometimes an error in the API, although they are legit parameter according to the API reference (https://platform.openai.com/docs/api-reference/chat/create)
params = self._remove_assistant_names_from_messages(params)

completions: Completions = self._oai_client.chat.completions if "messages" in params else self._oai_client.completions # type: ignore [attr-defined]
# If streaming is enabled and has messages, then iterate over the chunks of the response.
if params.get("stream", False) and "messages" in params:
Expand Down
5 changes: 5 additions & 0 deletions test/agentchat/test_conversable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ def exec_python(cell: Annotated[str, "Valid Python cell to execute."]) -> str:
}
},
"required": ["cell"],
"additionalProperties": False,
},
},
}
Expand Down Expand Up @@ -722,6 +723,7 @@ async def exec_sh(script: Annotated[str, "Valid shell script to execute."]) -> s
}
},
"required": ["script"],
"additionalProperties": False,
},
},
}
Expand Down Expand Up @@ -762,6 +764,7 @@ def exec_python(cell: Annotated[str, "Valid Python cell to execute."]) -> str:
}
},
"required": ["cell"],
"additionalProperties": False,
},
}
]
Expand Down Expand Up @@ -794,6 +797,7 @@ async def exec_sh(script: Annotated[str, "Valid shell script to execute."]) -> s
}
},
"required": ["script"],
"additionalProperties": False,
},
}
]
Expand Down Expand Up @@ -916,6 +920,7 @@ def exec_python(cell: Annotated[str, "Valid Python cell to execute."]) -> str:
}
},
"required": ["cell"],
"additionalProperties": False,
},
},
}
Expand Down
250 changes: 249 additions & 1 deletion test/oai/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
import shutil
import sys
import time
from tempfile import TemporaryDirectory

import pytest

from autogen import OpenAIWrapper, config_list_from_json
from autogen.cache.cache import Cache
from autogen.oai.client import LEGACY_CACHE_DIR, LEGACY_DEFAULT_CACHE_SEED
from autogen.oai.client import LEGACY_CACHE_DIR, LEGACY_DEFAULT_CACHE_SEED, OpenAIClient

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from conftest import skip_openai # noqa: E402
Expand Down Expand Up @@ -322,6 +323,253 @@ def test_cache():
assert not os.path.exists(os.path.join(cache_dir, str(LEGACY_DEFAULT_CACHE_SEED)))


def test__remove_assistant_names_from_messages():
params = {
"messages": [
{
"content": "system message",
"role": "system",
},
{"content": "Initial message", "name": "Teacher_Agent", "role": "user"},
{
"content": None,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": '{"message":"function parameter"}',
"name": "retrieve_exam_questions",
},
"id": "call_WtPX2rq3saIIqk4hM4jjAhNY",
"type": "function",
}
],
},
{
"content": "function reply",
"role": "tool",
"tool_call_id": "call_WtPX2rq3saIIqk4hM4jjAhNY",
},
{
"content": "some message",
"role": "assistant",
"name": "Student_Agent", # this is problematic for some reason and we'll remove it
},
{
"content": "some another message",
"name": "Teacher_Agent",
"role": "user",
},
],
"model": "gpt-4o-mini",
"stream": False,
"temperature": 0.8,
"tools": [
{
"function": {
"description": "Get exam questions from examiner",
"name": "retrieve_exam_questions",
"parameters": {
"additionalProperties": False,
"properties": {"message": {"description": "Message for examiner", "type": "string"}},
"required": ["message"],
"type": "object",
},
},
"type": "function",
},
],
}
actual = OpenAIClient._remove_assistant_names_from_messages(params)

assert [m for m in actual["messages"] if m.get("role") != "assistant"] == [
m for m in params["messages"] if m.get("role") != "assistant"
]
assert [m for m in actual["messages"] if m.get("role") == "assistant" and m.get("content") is not None] == [
{"content": "some message", "role": "assistant"}
]

actual.pop("messages")
params.pop("messages")
assert actual == params


@pytest.mark.skipif(skip, reason="openai>=1 not installed")
def test_chat_completion_after_tool_call():
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marklysze here is an example of failing completion call

Copy link
Collaborator

@marklysze marklysze Aug 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @davorrunje, is it possible to get the AutoGen code that would have generated this?

As an update: I ran the params through OpenAI's API (response = completions.create(**params)) and it ran through okay and returned a function call.

With my agent's termination check it failed on checking the content for the termination keyword as content is None:
is_termination_msg=lambda x: True if "FINISH" in x["content"] else False

The exception: argument of type 'NoneType' is not iterable

Updating my termination expression corrected that:
is_termination_msg=lambda x: True if x["content"] and "FINISH" in x["content"] else False

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marklysze I marked the PR as a draft as I am not sure yet what kind of workaround is the best one. The issue seems to be on the OpenAPI side and it is more common with GPT-4o-mini than older models. Maybe we should remove names only if we get an exception? Even changing the system message a little bit helps sometimes. The list of possible workarounds is (https://community.openai.com/t/bizarre-issue-preventing-response-from-gpt-4o-mini-the-model-produces-invalid-content/875432):

  • Remove tools and tool_choice args.
  • Slightly reduce the length of the prompt, even by a couple of words.
  • Add at least a second user message or an assistant message.
  • Change the name or remove the name parameter (but that didn’t always work, depending on the prompt). Some names work, others don’t. ‘Gregory’ worked, for example.
  • Change the message. Some work and some don’t. ‘Hello.’ didn’t work, but ‘Hello, friend.’ worked.
  • Change to gpt-4o or gpt-3.5-turbo.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @davorrunje, appreciate the detail and the note on the draft.

It is definitely tricky because removing the name is a considerable change and could affect people's existing code - at the very least I'd suggest it is made as an option and defaults to existing behaviour of leaving it in.

From the list you provided, I think the third point would be the safest to do to minimise the impact on the validity of the messages. In the following linked LinkedIn post they added a message "DO NOT PRODUCE INVALID CONTENT" and that seemed to fix it! :).

LinkedIn Post on this being intermittent behaviour.

I would love to be able to replicate it, have you got any other examples that you can get to throw an exception?

I wonder if rather than change the messages to start with, we run inference and if it throws that specific exception then it adjusts the messages (perhaps with 3rd option above) and tries again up to x times.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think we need to add a simple hack first and see how it goes. I see this error quite often when working with function calls and GPT-4o and GPT-4o-mini. I'll try adding a message as suggested first.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, not sure if it's possible but if you do get the exception and are able to get the params["messages"] it would be interesting to see if it is replicated.

See how you go with the additional message.

config_list = config_list_from_json(
env_or_file=OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"tags": ["gpt-4o-mini"]},
)
with TemporaryDirectory() as temp_dir:
with Cache.disk(cache_seed=42, cache_path_root=temp_dir) as cache:
client = OpenAIWrapper(config_list=config_list, cache=cache)
params = {
"messages": [
{
"content": "You are a student writing a practice test. Your "
"task is as follows:\n"
" 1) Retrieve exam questions by calling a "
"function.\n"
" 2) Write a draft of proposed answers and engage "
"in dialogue with your tutor.\n"
" 3) Once you are done with the dialogue, register "
"the final answers by calling a function.\n"
" 4) Retrieve the final grade by calling a "
"function.\n"
"Finally, terminate the chat by saying 'TERMINATE'.",
"role": "system",
},
{
"content": "Prepare for the test about Leonardo da Vinci.",
"name": "Teacher_Agent",
"role": "user",
},
{
"content": None,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": '{"message":"Please '
"provide the exam "
"questions for the "
"test about Leonardo "
'da Vinci."}',
"name": "retrieve_exam_questions",
},
"id": "call_WtPX2rq3saIIqk4hM4jjAhNY",
"type": "function",
}
],
},
{
"content": "1) Mona Lisa 2) Innovations 3) Florence at the time "
"of Leonardo 4) The Last Supper 5) Vit",
"role": "tool",
"tool_call_id": "call_WtPX2rq3saIIqk4hM4jjAhNY",
},
{
"content": "I have retrieved the exam questions about Leonardo "
"da Vinci. Here they are:\n"
"\n"
"1. Mona Lisa\n"
"2. Innovations\n"
"3. Florence at the time of Leonardo\n"
"4. The Last Supper\n"
"5. Vitruvian Man\n"
"\n"
"Now, I'll draft proposed answers for each question. "
"Let's start with the first one: \n"
"\n"
"1. **Mona Lisa**: The Mona Lisa, painted by "
"Leonardo da Vinci in the early 16th century, is "
"arguably the most famous painting in the world. It "
"is known for the subject's enigmatic expression and "
"da Vinci's masterful use of sfumato, which creates "
"a soft transition between colors and tones.\n"
"\n"
# "What do you think of this draft answer? Would you "
"like to add or change anything?",
"role": "assistant",
"name": "Student_Agent", # this is problematic for some reason
},
{
"content": "Your draft answer for the Mona Lisa is "
"well-articulated and captures the essence of the "
"painting and its significance. Here are a few "
"suggestions to enhance your response:\n"
"\n"
"1. **Historical Context**: You could mention that "
"the painting was created between 1503 and 1506 and "
"is housed in the Louvre Museum in Paris.\n"
" \n"
"2. **Artistic Techniques**: You might want to "
"elaborate on the technique of sfumato, explaining "
"how it contributes to the depth and realism of the "
"portrait.\n"
"\n"
"3. **Cultural Impact**: It could be beneficial to "
"touch on the painting's influence on art and "
"culture, as well as its status as a symbol of the "
"Renaissance.\n"
"\n"
"Here's a revised version incorporating these "
"points:\n"
"\n"
"**Revised Answer**: The Mona Lisa, painted by "
"Leonardo da Vinci between 1503 and 1506, is "
"arguably the most famous painting in the world and "
"is housed in the Louvre Museum in Paris. It is "
"renowned for the subject's enigmatic expression, "
"which has intrigued viewers for centuries. Da "
"Vinci's masterful use of sfumato, a technique that "
"allows for a soft transition between colors and "
"tones, adds a remarkable depth and realism to the "
"piece. The Mona Lisa has not only influenced "
"countless artists and movements but has also become "
"a symbol of the Renaissance and a cultural icon in "
"its own right.\n"
"\n"
"Let me know if you would like to move on to the "
"next question or make further adjustments!",
"name": "Teacher_Agent",
"role": "user",
},
],
"model": "gpt-4o-mini",
"stream": False,
"temperature": 0.8,
"tools": [
{
"function": {
"description": "Get exam questions from examiner",
"name": "retrieve_exam_questions",
"parameters": {
"additionalProperties": False,
"properties": {"message": {"description": "Message for examiner", "type": "string"}},
"required": ["message"],
"type": "object",
},
},
"type": "function",
},
{
"function": {
"description": "Write a final answers to exam "
"questions to examiner, but only after "
"discussing with the tutor first.",
"name": "write_final_answers",
"parameters": {
"additionalProperties": False,
"properties": {"message": {"description": "Message for examiner", "type": "string"}},
"required": ["message"],
"type": "object",
},
},
"type": "function",
},
{
"function": {
"description": "Get the final grade after submitting " "the answers.",
"name": "get_final_grade",
"parameters": {
"additionalProperties": False,
"properties": {"message": {"description": "Message for examiner", "type": "string"}},
"required": ["message"],
"type": "object",
},
},
"type": "function",
},
],
}

response = client.create(**params)

assert response is not None


if __name__ == "__main__":
# test_aoai_chat_completion()
# test_oai_tool_calling_extraction()
Expand Down
4 changes: 4 additions & 0 deletions test/test_function_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def f(a: Annotated[str, "Parameter a"], b=1, c: Annotated[float, "Parameter c"]
"c": {"type": "number", "description": "Parameter c", "default": 1.0},
},
"required": ["a"],
"additionalProperties": False,
}

actual = model_dump(get_parameters(required, param_annotations, default_values))
Expand Down Expand Up @@ -236,6 +237,7 @@ def test_get_function_schema() -> None:
},
},
"required": ["a", "d"],
"additionalProperties": False,
},
},
}
Expand Down Expand Up @@ -264,6 +266,7 @@ def test_get_function_schema() -> None:
},
},
"required": ["a", "d"],
"additionalProperties": False,
},
},
}
Expand Down Expand Up @@ -333,6 +336,7 @@ def currency_calculator( # type: ignore[empty-body]
},
},
"required": ["base"],
"additionalProperties": False,
},
},
}
Expand Down
Loading