Skip to content

Commit

Permalink
Add Structure Chat Agent (langchain-ai#3912)
Browse files Browse the repository at this point in the history
Create a new chat agent that is compatible with the Multi-input tools
  • Loading branch information
vowelparrot authored May 2, 2023
1 parent ec21b71 commit c582f2e
Show file tree
Hide file tree
Showing 10 changed files with 705 additions and 14 deletions.
312 changes: 312 additions & 0 deletions docs/modules/agents/agents/examples/structured_chat.ipynb

Large diffs are not rendered by default.

153 changes: 139 additions & 14 deletions docs/modules/agents/toolkits/examples/playwright.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# !pip install playwright > /dev/null\n",
Expand Down Expand Up @@ -49,7 +51,9 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# This import is required only for jupyter notebooks, since they have their own eventloop\n",
Expand All @@ -69,18 +73,20 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[ClickTool(sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>, name='click_element', description='Click on an element with the given CSS selector', args_schema=<class 'langchain.tools.playwright.click.ClickToolInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x10e104290>),\n",
" NavigateTool(sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>, name='navigate_browser', description='Navigate a browser to the specified URL', args_schema=<class 'langchain.tools.playwright.navigate.NavigateToolInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x10e104290>),\n",
" NavigateBackTool(sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>, name='previous_webpage', description='Navigate back to the previous page in the browser history', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x10e104290>),\n",
" ExtractTextTool(sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>, name='extract_text', description='Extract all the text on the current webpage', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x10e104290>),\n",
" ExtractHyperlinksTool(sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>, name='extract_hyperlinks', description='Extract all hyperlinks on the current webpage', args_schema=<class 'langchain.tools.playwright.extract_hyperlinks.ExtractHyperlinksToolInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x10e104290>),\n",
" GetElementsTool(sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>, name='get_elements', description='Retrieve elements in the current web page matching the given CSS selector', args_schema=<class 'langchain.tools.playwright.get_elements.GetElementsToolInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x10e104290>),\n",
" CurrentWebPageTool(sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>, name='current_webpage', description='Returns the URL of the current page', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x10e104290>)]"
"[ClickTool(name='click_element', description='Click on an element with the given CSS selector', args_schema=<class 'langchain.tools.playwright.click.ClickToolInput'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" NavigateTool(name='navigate_browser', description='Navigate a browser to the specified URL', args_schema=<class 'langchain.tools.playwright.navigate.NavigateToolInput'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" NavigateBackTool(name='previous_webpage', description='Navigate back to the previous page in the browser history', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" ExtractTextTool(name='extract_text', description='Extract all the text on the current webpage', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" ExtractHyperlinksTool(name='extract_hyperlinks', description='Extract all hyperlinks on the current webpage', args_schema=<class 'langchain.tools.playwright.extract_hyperlinks.ExtractHyperlinksToolInput'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" GetElementsTool(name='get_elements', description='Retrieve elements in the current web page matching the given CSS selector', args_schema=<class 'langchain.tools.playwright.get_elements.GetElementsToolInput'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>),\n",
" CurrentWebPageTool(name='current_webpage', description='Returns the URL of the current page', args_schema=<class 'pydantic.main.BaseModel'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, sync_browser=None, async_browser=<Browser type=<BrowserType name=chromium executable_path=/Users/wfh/Library/Caches/ms-playwright/chromium-1055/chrome-mac/Chromium.app/Contents/MacOS/Chromium> version=112.0.5615.29>)]"
]
},
"execution_count": 4,
Expand All @@ -98,7 +104,9 @@
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"tools_by_name = {tool.name: tool for tool in tools}\n",
Expand All @@ -109,7 +117,9 @@
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
Expand All @@ -129,7 +139,9 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
Expand All @@ -150,7 +162,9 @@
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
Expand All @@ -168,6 +182,117 @@
"await tools_by_name['current_webpage'].arun({})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use within an Agent\n",
"\n",
"Several of the browser tools are `StructuredTool`'s, meaning they expect multiple arguments. These aren't compatible (out of the box) with agents older than the `STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION`"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.agents import initialize_agent, AgentType\n",
"from langchain.chat_models import ChatAnthropic\n",
"\n",
"llm = ChatAnthropic(temperature=0) # or any other LLM, e.g., ChatOpenAI(), OpenAI()\n",
"\n",
"agent_chain = initialize_agent(tools, llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m Thought: I need to navigate to langchain.com to see the headers\n",
"Action: \n",
"```\n",
"{\n",
" \"action\": \"navigate_browser\",\n",
" \"action_input\": \"https://langchain.com/\"\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3mNavigating to https://langchain.com/ returned status code 200\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m Action:\n",
"```\n",
"{\n",
" \"action\": \"get_elements\",\n",
" \"action_input\": {\n",
" \"selector\": \"h1, h2, h3, h4, h5, h6\"\n",
" } \n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3m[]\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m Thought: The page has loaded, I can now extract the headers\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"get_elements\",\n",
" \"action_input\": {\n",
" \"selector\": \"h1, h2, h3, h4, h5, h6\"\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3m[]\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m Thought: I need to navigate to langchain.com to see the headers\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"navigate_browser\",\n",
" \"action_input\": \"https://langchain.com/\"\n",
"}\n",
"```\n",
"\n",
"\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3mNavigating to https://langchain.com/ returned status code 200\u001b[0m\n",
"Thought:\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"The headers on langchain.com are:\n",
"\n",
"h1: Langchain - Decentralized Translation Protocol \n",
"h2: A protocol for decentralized translation \n",
"h3: How it works\n",
"h3: The Problem\n",
"h3: The Solution\n",
"h3: Key Features\n",
"h3: Roadmap\n",
"h3: Team\n",
"h3: Advisors\n",
"h3: Partners\n",
"h3: FAQ\n",
"h3: Contact Us\n",
"h3: Subscribe for updates\n",
"h3: Follow us on social media \n",
"h3: Langchain Foundation Ltd. All rights reserved.\n",
"\n"
]
}
],
"source": [
"result = await agent_chain.arun(\"What are the headers on langchain.com?\")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
2 changes: 2 additions & 0 deletions langchain/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from langchain.agents.mrkl.base import MRKLChain, ZeroShotAgent
from langchain.agents.react.base import ReActChain, ReActTextWorldAgent
from langchain.agents.self_ask_with_search.base import SelfAskWithSearchChain
from langchain.agents.structured_chat.base import StructuredChatAgent
from langchain.agents.tools import Tool, tool

__all__ = [
Expand All @@ -43,6 +44,7 @@
"ReActChain",
"ReActTextWorldAgent",
"SelfAskWithSearchChain",
"StructuredChatAgent",
"Tool",
"ZeroShotAgent",
"create_csv_agent",
Expand Down
3 changes: 3 additions & 0 deletions langchain/agents/agent_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ class AgentType(str, Enum):
CONVERSATIONAL_REACT_DESCRIPTION = "conversational-react-description"
CHAT_ZERO_SHOT_REACT_DESCRIPTION = "chat-zero-shot-react-description"
CHAT_CONVERSATIONAL_REACT_DESCRIPTION = "chat-conversational-react-description"
STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION = (
"structured-chat-zero-shot-react-description"
)
Empty file.
130 changes: 130 additions & 0 deletions langchain/agents/structured_chat/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import re
from typing import Any, List, Optional, Sequence, Tuple

from pydantic import Field

from langchain.agents.agent import Agent, AgentOutputParser
from langchain.agents.structured_chat.output_parser import (
StructuredChatOutputParser,
StructuredChatOutputParserWithRetries,
)
from langchain.agents.structured_chat.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.prompts.base import BasePromptTemplate
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import AgentAction
from langchain.tools import BaseTool


class StructuredChatAgent(Agent):
output_parser: AgentOutputParser = Field(default_factory=StructuredChatOutputParser)

@property
def observation_prefix(self) -> str:
"""Prefix to append the observation with."""
return "Observation: "

@property
def llm_prefix(self) -> str:
"""Prefix to append the llm call with."""
return "Thought:"

def _construct_scratchpad(
self, intermediate_steps: List[Tuple[AgentAction, str]]
) -> str:
agent_scratchpad = super()._construct_scratchpad(intermediate_steps)
if not isinstance(agent_scratchpad, str):
raise ValueError("agent_scratchpad should be of type string.")
if agent_scratchpad:
return (
f"This was your previous work "
f"(but I haven't seen any of it! I only see what "
f"you return as final answer):\n{agent_scratchpad}"
)
else:
return agent_scratchpad

@classmethod
def _validate_tools(cls, tools: Sequence[BaseTool]) -> None:
pass

@classmethod
def _get_default_output_parser(
cls, llm: Optional[BaseLanguageModel] = None, **kwargs: Any
) -> AgentOutputParser:
return StructuredChatOutputParserWithRetries.from_llm(llm=llm)

@property
def _stop(self) -> List[str]:
return ["Observation:"]

@classmethod
def create_prompt(
cls,
tools: Sequence[BaseTool],
prefix: str = PREFIX,
suffix: str = SUFFIX,
format_instructions: str = FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]] = None,
) -> BasePromptTemplate:
tool_strings = []
for tool in tools:
args_schema = re.sub("}", "}}}}", re.sub("{", "{{{{", str(tool.args)))
tool_strings.append(f"{tool.name}: {tool.description}, args: {args_schema}")
formatted_tools = "\n".join(tool_strings)
tool_names = ", ".join([tool.name for tool in tools])
format_instructions = format_instructions.format(tool_names=tool_names)
template = "\n\n".join([prefix, formatted_tools, format_instructions, suffix])
messages = [
SystemMessagePromptTemplate.from_template(template),
HumanMessagePromptTemplate.from_template("{input}\n\n{agent_scratchpad}"),
]
if input_variables is None:
input_variables = ["input", "agent_scratchpad"]
return ChatPromptTemplate(input_variables=input_variables, messages=messages)

@classmethod
def from_llm_and_tools(
cls,
llm: BaseLanguageModel,
tools: Sequence[BaseTool],
callback_manager: Optional[BaseCallbackManager] = None,
output_parser: Optional[AgentOutputParser] = None,
prefix: str = PREFIX,
suffix: str = SUFFIX,
format_instructions: str = FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]] = None,
**kwargs: Any,
) -> Agent:
"""Construct an agent from an LLM and tools."""
cls._validate_tools(tools)
prompt = cls.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
format_instructions=format_instructions,
input_variables=input_variables,
)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
_output_parser = output_parser or cls._get_default_output_parser(llm=llm)
return cls(
llm_chain=llm_chain,
allowed_tools=tool_names,
output_parser=_output_parser,
**kwargs,
)

@property
def _agent_type(self) -> str:
raise ValueError
Loading

0 comments on commit c582f2e

Please sign in to comment.