Skip to content

Commit

Permalink
fix: patch bug in inner thoughts unpacker (#2311)
Browse files Browse the repository at this point in the history
  • Loading branch information
cpacker authored Dec 23, 2024
1 parent ea2a739 commit c8f5dc3
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 80 deletions.
6 changes: 3 additions & 3 deletions letta/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ def execute_tool_and_persist_state(self, function_name: str, function_args: dict
)
function_response, updated_agent_state = sandbox_run_result.func_return, sandbox_run_result.agent_state
assert orig_memory_str == self.agent_state.memory.compile(), "Memory should not be modified in a sandbox tool"

self.update_memory_if_change(updated_agent_state.memory)
if updated_agent_state is not None:
self.update_memory_if_change(updated_agent_state.memory)
except Exception as e:
# Need to catch error here, or else trunction wont happen
# TODO: modify to function execution error
Expand All @@ -251,7 +251,7 @@ def execute_tool_and_persist_state(self, function_name: str, function_args: dict
def _get_ai_reply(
self,
message_sequence: List[Message],
function_call: str = "auto",
function_call: Optional[str] = None,
first_message: bool = False,
stream: bool = False, # TODO move to config?
empty_response_retry_limit: int = 3,
Expand Down
10 changes: 8 additions & 2 deletions letta/llm_api/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ def unpack_all_inner_thoughts_from_kwargs(

def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
message = choice.message
rewritten_choice = choice # inner thoughts unpacked out of the function

if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
if len(message.tool_calls) > 1:
warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
Expand All @@ -271,14 +273,18 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -
warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
new_choice.message.content = inner_thoughts

return new_choice
# update the choice object
rewritten_choice = new_choice
else:
warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
return choice

except json.JSONDecodeError as e:
warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
raise e
else:
warnings.warn(f"Did not find tool call in message: {str(message)}")

return rewritten_choice


def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool:
Expand Down
18 changes: 11 additions & 7 deletions letta/llm_api/llm_api_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def create(
user_id: Optional[str] = None, # option UUID to associate request with
functions: Optional[list] = None,
functions_python: Optional[dict] = None,
function_call: str = "auto",
function_call: Optional[str] = None, # see: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
# hint
first_message: bool = False,
force_tool_call: Optional[str] = None, # Force a specific tool to be called
Expand Down Expand Up @@ -148,10 +148,19 @@ def create(

# openai
if llm_config.model_endpoint_type == "openai":

if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
# only is a problem if we are *not* using an openai proxy
raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])

if function_call is None and functions is not None and len(functions) > 0:
# force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
# TODO(matt) move into LLMConfig
if llm_config.model_endpoint == "https://inference.memgpt.ai":
function_call = "auto" # TODO change to "required" once proxy supports it
else:
function_call = "required"

data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens)
if stream: # Client requested token streaming
data.stream = True
Expand Down Expand Up @@ -255,12 +264,7 @@ def create(

tool_call = None
if force_tool_call is not None:
tool_call = {
"type": "function",
"function": {
"name": force_tool_call
}
}
tool_call = {"type": "function", "function": {"name": force_tool_call}}
assert functions is not None

return anthropic_chat_completions_request(
Expand Down
2 changes: 1 addition & 1 deletion letta/services/agent_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def rebuild_system_prompt(self, agent_id: str, actor: PydanticUser, force=False,
curr_memory_str = agent_state.memory.compile()
if curr_memory_str in curr_system_message_openai["content"] and not force:
# NOTE: could this cause issues if a block is removed? (substring match would still work)
logger.info(
logger.debug(
f"Memory hasn't changed for agent id={agent_id} and actor=({actor.id}, {actor.name}), skipping system prompt rebuild"
)
return agent_state
Expand Down
6 changes: 5 additions & 1 deletion tests/helpers/endpoints_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ def check_first_response_is_valid_for_llm_endpoint(filename: str) -> ChatComplet
choice = response.choices[0]

# Ensure that the first message returns a "send_message"
validator_func = lambda function_call: function_call.name == "send_message" or function_call.name == "archival_memory_search"
validator_func = (
lambda function_call: function_call.name == "send_message"
or function_call.name == "archival_memory_search"
or function_call.name == "core_memory_append"
)
assert_contains_valid_function_call(choice.message, validator_func)

# Assert that the message has an inner monologue
Expand Down
97 changes: 31 additions & 66 deletions tests/integration_test_agent_tool_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import uuid

import pytest

from letta import create_client
from letta.schemas.letta_message import ToolCallMessage
from letta.schemas.tool_rule import (
Expand Down Expand Up @@ -42,7 +43,7 @@ def second_secret_word(prev_secret_word: str):
prev_secret_word (str): The secret word retrieved from calling first_secret_word.
"""
if prev_secret_word != "v0iq020i0g":
raise RuntimeError(f"Expected secret {"v0iq020i0g"}, got {prev_secret_word}")
raise RuntimeError(f"Expected secret {'v0iq020i0g'}, got {prev_secret_word}")

return "4rwp2b4gxq"

Expand All @@ -55,7 +56,7 @@ def third_secret_word(prev_secret_word: str):
prev_secret_word (str): The secret word retrieved from calling second_secret_word.
"""
if prev_secret_word != "4rwp2b4gxq":
raise RuntimeError(f"Expected secret {"4rwp2b4gxq"}, got {prev_secret_word}")
raise RuntimeError(f'Expected secret "4rwp2b4gxq", got {prev_secret_word}')

return "hj2hwibbqm"

Expand All @@ -68,7 +69,7 @@ def fourth_secret_word(prev_secret_word: str):
prev_secret_word (str): The secret word retrieved from calling third_secret_word.
"""
if prev_secret_word != "hj2hwibbqm":
raise RuntimeError(f"Expected secret {"hj2hwibbqm"}, got {prev_secret_word}")
raise RuntimeError(f"Expected secret {'hj2hwibbqm'}, got {prev_secret_word}")

return "banana"

Expand Down Expand Up @@ -194,16 +195,13 @@ def test_check_tool_rules_with_different_models(mock_e2b_api_key_none):
"tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json",
"tests/configs/llm_model_configs/openai-gpt-4o.json",
]

# Create two test tools
t1_name = "first_secret_word"
t2_name = "second_secret_word"
t1 = client.create_or_update_tool(first_secret_word, name=t1_name)
t2 = client.create_or_update_tool(second_secret_word, name=t2_name)
tool_rules = [
InitToolRule(tool_name=t1_name),
InitToolRule(tool_name=t2_name)
]
tool_rules = [InitToolRule(tool_name=t1_name), InitToolRule(tool_name=t2_name)]
tools = [t1, t2]

for config_file in config_files:
Expand All @@ -212,34 +210,26 @@ def test_check_tool_rules_with_different_models(mock_e2b_api_key_none):

if "gpt-4o" in config_file:
# Structured output model (should work with multiple init tools)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid,
tool_ids=[t.id for t in tools],
tool_rules=tool_rules)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
assert agent_state is not None
else:
# Non-structured output model (should raise error with multiple init tools)
with pytest.raises(ValueError, match="Multiple initial tools are not supported for non-structured models"):
setup_agent(client, config_file, agent_uuid=agent_uuid,
tool_ids=[t.id for t in tools],
tool_rules=tool_rules)

setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)

# Cleanup
cleanup(client=client, agent_uuid=agent_uuid)

# Create tool rule with single initial tool
t3_name = "third_secret_word"
t3 = client.create_or_update_tool(third_secret_word, name=t3_name)
tool_rules = [
InitToolRule(tool_name=t3_name)
]
tool_rules = [InitToolRule(tool_name=t3_name)]
tools = [t3]
for config_file in config_files:
agent_uuid = str(uuid.uuid4())

# Structured output model (should work with single init tool)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid,
tool_ids=[t.id for t in tools],
tool_rules=tool_rules)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
assert agent_state is not None

cleanup(client=client, agent_uuid=agent_uuid)
Expand All @@ -257,15 +247,17 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none):
tool_rules = [
InitToolRule(tool_name=t1_name),
ChildToolRule(tool_name=t1_name, children=[t2_name]),
TerminalToolRule(tool_name=t2_name)
TerminalToolRule(tool_name=t2_name),
]
tools = [t1, t2]

# Make agent state
anthropic_config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json"
for i in range(3):
agent_uuid = str(uuid.uuid4())
agent_state = setup_agent(client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
agent_state = setup_agent(
client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules
)
response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?")

assert_sanity_checks(response)
Expand All @@ -289,9 +281,10 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none):

# Implement exponential backoff with initial time of 10 seconds
if i < 2:
backoff_time = 10 * (2 ** i)
backoff_time = 10 * (2**i)
time.sleep(backoff_time)


@pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely
def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none):
client = create_client()
Expand Down Expand Up @@ -389,7 +382,7 @@ def test_agent_conditional_tool_easy(mock_e2b_api_key_none):
default_child=coin_flip_name,
child_output_mapping={
"hj2hwibbqm": secret_word_tool,
}
},
),
TerminalToolRule(tool_name=secret_word_tool),
]
Expand Down Expand Up @@ -425,7 +418,6 @@ def test_agent_conditional_tool_easy(mock_e2b_api_key_none):
cleanup(client=client, agent_uuid=agent_uuid)



@pytest.mark.timeout(90) # Longer timeout since this test has more steps
def test_agent_conditional_tool_hard(mock_e2b_api_key_none):
"""
Expand All @@ -450,38 +442,26 @@ def test_agent_conditional_tool_hard(mock_e2b_api_key_none):
final_tool = "fourth_secret_word"
play_game_tool = client.create_or_update_tool(can_play_game, name=play_game)
flip_coin_tool = client.create_or_update_tool(flip_coin_hard, name=coin_flip_name)
reveal_secret = client.create_or_update_tool(fourth_secret_word, name=final_tool)
reveal_secret = client.create_or_update_tool(fourth_secret_word, name=final_tool)

# Make tool rules - chain them together with conditional rules
tool_rules = [
InitToolRule(tool_name=play_game),
ConditionalToolRule(
tool_name=play_game,
default_child=play_game, # Keep trying if we can't play
child_output_mapping={
True: coin_flip_name # Only allow access when can_play_game returns True
}
child_output_mapping={True: coin_flip_name}, # Only allow access when can_play_game returns True
),
ConditionalToolRule(
tool_name=coin_flip_name,
default_child=coin_flip_name,
child_output_mapping={
"hj2hwibbqm": final_tool, "START_OVER": play_game
}
tool_name=coin_flip_name, default_child=coin_flip_name, child_output_mapping={"hj2hwibbqm": final_tool, "START_OVER": play_game}
),
TerminalToolRule(tool_name=final_tool),
]

# Setup agent with all tools
tools = [play_game_tool, flip_coin_tool, reveal_secret]
config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json"
agent_state = setup_agent(
client,
config_file,
agent_uuid=agent_uuid,
tool_ids=[t.id for t in tools],
tool_rules=tool_rules
)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)

# Ask agent to try to get all secret words
response = client.user_message(agent_id=agent_state.id, message="hi")
Expand Down Expand Up @@ -520,7 +500,7 @@ def test_agent_conditional_tool_without_default_child(mock_e2b_api_key_none):
Test the agent with a conditional tool that allows any child tool to be called if a function returns None.
Tool Flow:
return_none
|
v
Expand All @@ -541,27 +521,16 @@ def test_agent_conditional_tool_without_default_child(mock_e2b_api_key_none):
ConditionalToolRule(
tool_name=tool_name,
default_child=None, # Allow any tool to be called if output doesn't match
child_output_mapping={
"anything but none": "first_secret_word"
}
)
child_output_mapping={"anything but none": "first_secret_word"},
),
]
tools = [tool, secret_word]

# Setup agent with all tools
agent_state = setup_agent(
client,
config_file,
agent_uuid=agent_uuid,
tool_ids=[t.id for t in tools],
tool_rules=tool_rules
)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)

# Ask agent to try different tools based on the game output
response = client.user_message(
agent_id=agent_state.id,
message="call a function, any function. then call send_message"
)
response = client.user_message(agent_id=agent_state.id, message="call a function, any function. then call send_message")

# Make checks
assert_sanity_checks(response)
Expand Down Expand Up @@ -613,18 +582,14 @@ def test_agent_reload_remembers_function_response(mock_e2b_api_key_none):
ConditionalToolRule(
tool_name=flip_coin_name,
default_child=flip_coin_name, # Allow any tool to be called if output doesn't match
child_output_mapping={
"hj2hwibbqm": secret_word
}
child_output_mapping={"hj2hwibbqm": secret_word},
),
TerminalToolRule(tool_name=secret_word)
TerminalToolRule(tool_name=secret_word),
]
tools = [flip_coin_tool, secret_word_tool]

# Setup initial agent
agent_state = setup_agent(
client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules
)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)

# Call flip_coin first
response = client.user_message(agent_id=agent_state.id, message="flip a coin")
Expand All @@ -643,4 +608,4 @@ def test_agent_reload_remembers_function_response(mock_e2b_api_key_none):
assert reloaded_agent.last_function_response is not None

print(f"Got successful response from client: \n\n{response}")
cleanup(client=client, agent_uuid=agent_uuid)
cleanup(client=client, agent_uuid=agent_uuid)

0 comments on commit c8f5dc3

Please sign in to comment.