feat!: Add support for model family specification (#4856)

* Add support for model family specification * spelling mistake * lint, etc * fixes
microsoft · Dec 30, 2024 · cb1633b · cb1633b
1 parent 190fcd1
commit cb1633b
Show file tree

Hide file tree

Showing 13 changed files with 152 additions and 58 deletions.
diff --git a/python/packages/autogen-agentchat/src/autogen_agentchat/agents/_assistant_agent.py b/python/packages/autogen-agentchat/src/autogen_agentchat/agents/_assistant_agent.py
@@ -262,7 +262,7 @@ def __init__(
             self._system_messages = [SystemMessage(content=system_message)]
         self._tools: List[Tool] = []
         if tools is not None:
-            if model_client.capabilities["function_calling"] is False:
+            if model_client.model_info["function_calling"] is False:
                 raise ValueError("The model does not support function calling.")
             for tool in tools:
                 if isinstance(tool, Tool):
@@ -283,7 +283,7 @@ def __init__(
         self._handoff_tools: List[Tool] = []
         self._handoffs: Dict[str, HandoffBase] = {}
         if handoffs is not None:
-            if model_client.capabilities["function_calling"] is False:
+            if model_client.model_info["function_calling"] is False:
                 raise ValueError("The model does not support function calling, which is needed for handoffs.")
             for handoff in handoffs:
                 if isinstance(handoff, str):
@@ -331,7 +331,7 @@ async def on_messages_stream(
     ) -> AsyncGenerator[AgentEvent | ChatMessage | Response, None]:
         # Add messages to the model context.
         for msg in messages:
-            if isinstance(msg, MultiModalMessage) and self._model_client.capabilities["vision"] is False:
+            if isinstance(msg, MultiModalMessage) and self._model_client.model_info["vision"] is False:
                 raise ValueError("The model does not support vision.")
             await self._model_context.add_message(UserMessage(content=msg.content, source=msg.source))
 

diff --git a/python/packages/autogen-agentchat/tests/test_assistant_agent.py b/python/packages/autogen-agentchat/tests/test_assistant_agent.py
@@ -19,6 +19,7 @@
 from autogen_core import Image
 from autogen_core.model_context import BufferedChatCompletionContext
 from autogen_core.models import LLMMessage
+from autogen_core.models._model_client import ModelFamily
 from autogen_core.tools import FunctionTool
 from autogen_ext.models.openai import OpenAIChatCompletionClient
 from openai.resources.chat.completions import AsyncCompletions
@@ -387,11 +388,7 @@ async def test_invalid_model_capabilities() -> None:
     model_client = OpenAIChatCompletionClient(
         model=model,
         api_key="",
-        model_capabilities={
-            "vision": False,
-            "function_calling": False,
-            "json_output": False,
-        },
+        model_info={"vision": False, "function_calling": False, "json_output": False, "family": ModelFamily.UNKNOWN},
     )
 
     with pytest.raises(ValueError):

diff --git a/python/packages/autogen-core/src/autogen_core/models/__init__.py b/python/packages/autogen-core/src/autogen_core/models/__init__.py
@@ -1,4 +1,4 @@
-from ._model_client import ChatCompletionClient, ModelCapabilities
+from ._model_client import ChatCompletionClient, ModelCapabilities, ModelFamily, ModelInfo  # type: ignore
 from ._types import (
     AssistantMessage,
     ChatCompletionTokenLogprob,
@@ -27,4 +27,6 @@
     "CreateResult",
     "TopLogprob",
     "ChatCompletionTokenLogprob",
+    "ModelFamily",
+    "ModelInfo",
 ]
diff --git a/python/packages/autogen-core/src/autogen_core/models/_model_client.py b/python/packages/autogen-core/src/autogen_core/models/_model_client.py
@@ -1,28 +1,52 @@
 from __future__ import annotations
 
+import warnings
 from abc import ABC, abstractmethod
-from typing import Mapping, Optional, Sequence
+from typing import Literal, Mapping, Optional, Sequence, TypeAlias
 
-from typing_extensions import (
-    Any,
-    AsyncGenerator,
-    Required,
-    TypedDict,
-    Union,
-)
+from typing_extensions import Any, AsyncGenerator, Required, TypedDict, Union, deprecated
 
 from .. import CancellationToken
 from .._component_config import ComponentLoader
 from ..tools import Tool, ToolSchema
 from ._types import CreateResult, LLMMessage, RequestUsage
 
 
+class ModelFamily:
+    """A model family is a group of models that share similar characteristics from a capabilities perspective. This is different to discrete supported features such as vision, function calling, and JSON output.
+
+    This namespace class holds constants for the model families that AutoGen understands. Other families definitely exist and can be represented by a string, however, AutoGen will treat them as unknown."""
+
+    GPT_4O = "gpt-4o"
+    O1 = "o1"
+    GPT_4 = "gpt-4"
+    GPT_35 = "gpt-35"
+    UNKNOWN = "unknown"
+
+    ANY: TypeAlias = Literal["gpt-4o", "o1", "gpt-4", "gpt-35", "unknown"]
+
+    def __new__(cls, *args: Any, **kwargs: Any) -> ModelFamily:
+        raise TypeError(f"{cls.__name__} is a namespace class and cannot be instantiated.")
+
+
+@deprecated("Use the ModelInfo class instead ModelCapabilities.")
 class ModelCapabilities(TypedDict, total=False):
     vision: Required[bool]
     function_calling: Required[bool]
     json_output: Required[bool]
 
 
+class ModelInfo(TypedDict, total=False):
+    vision: Required[bool]
+    """True if the model supports vision, aka image input, otherwise False."""
+    function_calling: Required[bool]
+    """True if the model supports function calling, otherwise False."""
+    json_output: Required[bool]
+    """True if the model supports json output, otherwise False. Note: this is different to structured json."""
+    family: Required[ModelFamily.ANY | str]
+    """Model family should be one of the constants from :py:class:`ModelFamily` or a string representing an unknown model family."""
+
+
 class ChatCompletionClient(ABC, ComponentLoader):
     # Caching has to be handled internally as they can depend on the create args that were stored in the constructor
     @abstractmethod
@@ -63,6 +87,18 @@ def count_tokens(self, messages: Sequence[LLMMessage], *, tools: Sequence[Tool |
     @abstractmethod
     def remaining_tokens(self, messages: Sequence[LLMMessage], *, tools: Sequence[Tool | ToolSchema] = []) -> int: ...
 
+    # Deprecated
+    @property
+    @abstractmethod
+    def capabilities(self) -> ModelCapabilities: ...  # type: ignore
+
     @property
     @abstractmethod
-    def capabilities(self) -> ModelCapabilities: ...
+    def model_info(self) -> ModelInfo:
+        warnings.warn(
+            "Model client in use does not implement model_info property. Falling back to capabilities property. The capabilities property is deprecated and will be removed soon, please implement model_info instead in the model client class.",
+            stacklevel=2,
+        )
+        base_info: ModelInfo = self.capabilities  # type: ignore
+        base_info["family"] = ModelFamily.UNKNOWN
+        return base_info
diff --git a/python/packages/autogen-core/tests/test_tool_agent.py b/python/packages/autogen-core/tests/test_tool_agent.py
@@ -11,10 +11,11 @@
     FunctionExecutionResult,
     FunctionExecutionResultMessage,
     LLMMessage,
-    ModelCapabilities,
+    ModelCapabilities,  # type: ignore
     RequestUsage,
     UserMessage,
 )
+from autogen_core.models._model_client import ModelFamily, ModelInfo
 from autogen_core.tool_agent import (
     InvalidToolArgumentsException,
     ToolAgent,
@@ -138,8 +139,12 @@ def remaining_tokens(self, messages: Sequence[LLMMessage], *, tools: Sequence[To
             return 0
 
         @property
-        def capabilities(self) -> ModelCapabilities:
-            return ModelCapabilities(vision=False, function_calling=True, json_output=False)
+        def capabilities(self) -> ModelCapabilities:  # type: ignore
+            return ModelCapabilities(vision=False, function_calling=True, json_output=False)  # type: ignore
+
+        @property
+        def model_info(self) -> ModelInfo:
+            return ModelInfo(vision=False, function_calling=True, json_output=False, family=ModelFamily.UNKNOWN)
 
     client = MockChatCompletionClient()
     tools: List[Tool] = [FunctionTool(_pass_function, name="pass", description="Pass function")]

diff --git a/python/packages/autogen-ext/src/autogen_ext/agents/web_surfer/_multimodal_web_surfer.py b/python/packages/autogen-ext/src/autogen_ext/agents/web_surfer/_multimodal_web_surfer.py
@@ -183,11 +183,11 @@ def __init__(
             raise ValueError(
                 "Cannot save screenshots without a debug directory. Set it using the 'debug_dir' parameter. The debug directory is created if it does not exist."
             )
-        if model_client.capabilities["function_calling"] is False:
+        if model_client.model_info["function_calling"] is False:
             raise ValueError(
                 "The model does not support function calling. MultimodalWebSurfer requires a model that supports function calling."
             )
-        if model_client.capabilities["vision"] is False:
+        if model_client.model_info["vision"] is False:
             raise ValueError("The model is not multimodal. MultimodalWebSurfer requires a multimodal model.")
         self._model_client = model_client
         self.headless = headless

diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/_model_info.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/_model_info.py
@@ -1,6 +1,6 @@
 from typing import Dict
 
-from autogen_core.models import ModelCapabilities
+from autogen_core.models import ModelFamily, ModelInfo
 
 # Based on: https://platform.openai.com/docs/models/continuous-model-upgrades
 # This is a moving target, so correctness is checked by the model value returned by openai against expected values at runtime``
@@ -17,86 +17,102 @@
     "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
 }
 
-_MODEL_CAPABILITIES: Dict[str, ModelCapabilities] = {
+_MODEL_INFO: Dict[str, ModelInfo] = {
     "o1-preview-2024-09-12": {
         "vision": False,
         "function_calling": False,
         "json_output": False,
+        "family": ModelFamily.O1,
     },
     "o1-mini-2024-09-12": {
         "vision": False,
         "function_calling": False,
         "json_output": False,
+        "family": ModelFamily.O1,
     },
     "gpt-4o-2024-08-06": {
         "vision": True,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_4O,
     },
     "gpt-4o-2024-05-13": {
         "vision": True,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_4O,
     },
     "gpt-4o-mini-2024-07-18": {
         "vision": True,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_4O,
     },
     "gpt-4-turbo-2024-04-09": {
         "vision": True,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_4,
     },
     "gpt-4-0125-preview": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_4,
     },
     "gpt-4-1106-preview": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_4,
     },
     "gpt-4-1106-vision-preview": {
         "vision": True,
         "function_calling": False,
         "json_output": False,
+        "family": ModelFamily.GPT_4,
     },
     "gpt-4-0613": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_4,
     },
     "gpt-4-32k-0613": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_4,
     },
     "gpt-3.5-turbo-0125": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_35,
     },
     "gpt-3.5-turbo-1106": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_35,
     },
     "gpt-3.5-turbo-instruct": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_35,
     },
     "gpt-3.5-turbo-0613": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_35,
     },
     "gpt-3.5-turbo-16k-0613": {
         "vision": False,
         "function_calling": True,
         "json_output": True,
+        "family": ModelFamily.GPT_35,
     },
 }
 
@@ -126,9 +142,9 @@ def resolve_model(model: str) -> str:
     return model
 
 
-def get_capabilities(model: str) -> ModelCapabilities:
+def get_info(model: str) -> ModelInfo:
     resolved_model = resolve_model(model)
-    return _MODEL_CAPABILITIES[resolved_model]
+    return _MODEL_INFO[resolved_model]
 
 
 def get_token_limit(model: str) -> int: