From 7da80e93615077b5beec174a9de46ed0bad3ca9f Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Thu, 24 Oct 2024 07:13:37 +1000 Subject: [PATCH 01/12] initial vectordb storage --- python/packages/autogen-ext/pyproject.toml | 7 +- .../src/autogen_ext/storage/__init__.py | 4 + .../src/autogen_ext/storage/_base.py | 371 +++++++ .../src/autogen_ext/storage/_chromadb.py | 688 +++++++++++++ .../src/autogen_ext/storage/_factory.py | 33 + .../src/autogen_ext/storage/_utils.py | 93 ++ .../tests/storage/test_chroma_db.py | 80 ++ python/uv.lock | 937 +++++++++++++++++- 8 files changed, 2199 insertions(+), 14 deletions(-) create mode 100644 python/packages/autogen-ext/src/autogen_ext/storage/__init__.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/storage/_base.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/storage/_factory.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/storage/_utils.py create mode 100644 python/packages/autogen-ext/tests/storage/test_chroma_db.py diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml index f13843aabcf7..8d16a40c8e8d 100644 --- a/python/packages/autogen-ext/pyproject.toml +++ b/python/packages/autogen-ext/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "autogen-ext" -version = "0.4.0.dev2" +version = "0.4.0dev1" license = {file = "LICENSE-CODE"} description = "AutoGen extensions library" readme = "README.md" @@ -15,7 +15,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "autogen-core==0.4.0.dev2", + "autogen-core==0.4.0dev1", ] @@ -27,6 +27,7 @@ langchain = ["langchain_core~= 0.3.3"] azure = ["azure-core", "azure-identity"] docker = ["docker~=7.0"] openai = ["openai>=1.3"] +chromadb = ["chromadb~=0.5.15", "sentence-transformers"] [tool.hatch.build.targets.wheel] packages = ["src/autogen_ext"] @@ -56,4 +57,4 @@ test = "pytest -n auto" [tool.mypy] [[tool.mypy.overrides]] module = "docker.*" -ignore_missing_imports = true +ignore_missing_imports = true \ No newline at end of file diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/__init__.py b/python/packages/autogen-ext/src/autogen_ext/storage/__init__.py new file mode 100644 index 000000000000..523de3f21c4e --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/storage/__init__.py @@ -0,0 +1,4 @@ +from ._chromadb import ChromaVectorDB +from ._factory import VectorDBFactory + +__all__ = ["ChromaVectorDB", "VectorDBFactory"] diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py new file mode 100644 index 000000000000..2e73c61e5155 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py @@ -0,0 +1,371 @@ +from typing import ( + Any, + Callable, + List, + Mapping, + Optional, + Protocol, + Sequence, + Tuple, + Union, + runtime_checkable, +) + +from pydantic import BaseModel + +Metadata = Union[Mapping[str, Any], None] +Vector = Union[Sequence[float], Sequence[int]] +ItemID = Union[str, int] + + +class Document(BaseModel): + """Define Document according to autogen 0.4 specifications.""" + + id: ItemID + content: Optional[str] = None + metadata: Optional[Metadata] = None + embedding: Optional[Vector] = None + + model_config = {"arbitrary_types_allowed": True} + + +"""QueryResults is the response from the vector database for a query/queries. +A query is a list containing one string while queries is a list containing multiple strings. +The response is a list of query results, each query result is a list of tuples containing the document and the distance. +""" +QueryResults = List[List[Tuple[Document, float]]] + + +@runtime_checkable +class AsyncVectorDB(Protocol): + """ + Abstract class for async vector database. A vector database is responsible for storing and retrieving documents. + + Attributes: + active_collection: Any | The active collection in the vector database. Make get_collection faster. Default is None. + type: str | The type of the vector database, chroma, pgvector, etc. Default is "". + + Methods: + create_collection: Callable[[str, bool, bool], Awaitable[Any]] | Create a collection in the vector database. + get_collection: Callable[[str], Awaitable[Any]] | Get the collection from the vector database. + delete_collection: Callable[[str], Awaitable[Any]] | Delete the collection from the vector database. + insert_docs: Callable[[List[Document], str, bool], Awaitable[None]] | Insert documents into the collection of the vector database. + update_docs: Callable[[List[Document], str], Awaitable[None]] | Update documents in the collection of the vector database. + delete_docs: Callable[[List[ItemID], str], Awaitable[None]] | Delete documents from the collection of the vector database. + retrieve_docs: Callable[[List[str], str, int, float], Awaitable[QueryResults]] | Retrieve documents from the collection of the vector database based on the queries. + get_docs_by_ids: Callable[[List[ItemID], str], Awaitable[List[Document]]] | Retrieve documents from the collection of the vector database based on the ids. + """ + + active_collection: Any = None + type: str = "" + embedding_function: Optional[Callable[[List[str]], List[List[float]]]] = ( + None # embeddings = embedding_function(sentences) + ) + + async def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: + """ + Create a collection in the vector database. + Case 1. if the collection does not exist, create the collection. + Case 2. the collection exists, if overwrite is True, it will overwrite the collection. + Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, + otherwise it raise a ValueError. + + Args: + collection_name: str | The name of the collection. + overwrite: bool | Whether to overwrite the collection if it exists. Default is False. + get_or_create: bool | Whether to get the collection if it exists. Default is True. + + Returns: + Any | The collection object. + """ + ... + + async def get_collection(self, collection_name: Optional[str] = None) -> Any: + """ + Get the collection from the vector database. + + Args: + collection_name: Optional[str] | The name of the collection. Default is None. + If None, return the current active collection. + + Returns: + Any | The collection object. + """ + ... + + async def delete_collection(self, collection_name: str) -> Any: + """ + Delete the collection from the vector database. + + Args: + collection_name: str | The name of the collection. + + Returns: + Any + """ + ... + + async def insert_docs( + self, + docs: Sequence[Document], + collection_name: Optional[str] = None, + upsert: bool = False, + **kwargs: Any, + ) -> None: + """ + Insert documents into the collection of the vector database. + + Args: + docs: List[Document] | A list of documents. Each document is a Pydantic Document model. + collection_name: Optional[str] | The name of the collection. Default is None. + upsert: bool | Whether to update the document if it exists. Default is False. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + ... + + async def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + """ + Update documents in the collection of the vector database. + + Args: + docs: List[Document] | A list of documents. + collection_name: Optional[str] | The name of the collection. Default is None. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + ... + + async def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: + """ + Delete documents from the collection of the vector database. + + Args: + ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. + collection_name: Optional[str] | The name of the collection. Default is None. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + ... + + async def retrieve_docs( + self, + queries: Sequence[str], + collection_name: Optional[str] = None, + n_results: int = 10, + distance_threshold: float = -1, + **kwargs: Any, + ) -> QueryResults: + """ + Retrieve documents from the collection of the vector database based on the queries. + + Args: + queries: List[str] | A list of queries. Each query is a string. + collection_name: Optional[str] | The name of the collection. Default is None. + n_results: int | The number of relevant documents to return. Default is 10. + distance_threshold: float | The threshold for the distance score, only distance smaller than it will be + returned. Don't filter with it if < 0. Default is -1. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + QueryResults | The query results. Each query result is a list of list of tuples containing the document and + the distance. + """ + ... + + async def get_docs_by_ids( + self, + ids: Optional[Sequence[ItemID]] = None, + collection_name: Optional[str] = None, + include: Optional[Sequence[str]] = None, + **kwargs: Any, + ) -> List[Document]: + """ + Retrieve documents from the collection of the vector database based on the ids. + + Args: + ids: Optional[List[ItemID]] | A list of document ids. If None, will return all the documents. Default is None. + collection_name: Optional[str] | The name of the collection. Default is None. + include: Optional[List[str]] | The fields to include. Default is None. + If None, will include ["metadatas", "documents"], ids will always be included. This may differ + depending on the implementation. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + List[Document] | The results. + """ + ... + + +@runtime_checkable +class VectorDB(Protocol): + """ + Abstract class for synchronous vector database. A vector database is responsible for storing and retrieving documents. + For async support, use AsyncVectorDB instead. + + Attributes: + active_collection: Any | The active collection in the vector database. Make get_collection faster. Default is None. + type: str | The type of the vector database, chroma, pgvector, etc. Default is "". + + Methods: + create_collection: Callable[[str, bool, bool], Any] | Create a collection in the vector database. + get_collection: Callable[[str], Any] | Get the collection from the vector database. + delete_collection: Callable[[str], Any] | Delete the collection from the vector database. + insert_docs: Callable[[List[Document], str, bool], None] | Insert documents into the collection of the vector database. + update_docs: Callable[[List[Document], str], None] | Update documents in the collection of the vector database. + delete_docs: Callable[[List[ItemID], str], None] | Delete documents from the collection of the vector database. + retrieve_docs: Callable[[List[str], str, int, float], QueryResults] | Retrieve documents from the collection of the vector database based on the queries. + get_docs_by_ids: Callable[[List[ItemID], str], List[Document]] | Retrieve documents from the collection of the vector database based on the ids. + """ + + active_collection: Any = None + type: str = "" + embedding_function: Optional[Callable[[List[str]], List[List[float]]]] = ( + None # embeddings = embedding_function(sentences) + ) + + def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: + """ + Create a collection in the vector database. + Case 1. if the collection does not exist, create the collection. + Case 2. the collection exists, if overwrite is True, it will overwrite the collection. + Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, + otherwise it raise a ValueError. + + Args: + collection_name: str | The name of the collection. + overwrite: bool | Whether to overwrite the collection if it exists. Default is False. + get_or_create: bool | Whether to get the collection if it exists. Default is True. + + Returns: + Any | The collection object. + """ + ... + + def get_collection(self, collection_name: Optional[str] = None) -> Any: + """ + Get the collection from the vector database. + + Args: + collection_name: Optional[str] | The name of the collection. Default is None. + If None, return the current active collection. + + Returns: + Any | The collection object. + """ + ... + + def delete_collection(self, collection_name: str) -> Any: + """ + Delete the collection from the vector database. + + Args: + collection_name: str | The name of the collection. + + Returns: + Any + """ + ... + + def insert_docs( + self, + docs: Sequence[Document], + collection_name: Optional[str] = None, + upsert: bool = False, + **kwargs: Any, + ) -> None: + """ + Insert documents into the collection of the vector database. + + Args: + docs: List[Document] | A list of documents. Each document is a Pydantic Document model. + collection_name: Optional[str] | The name of the collection. Default is None. + upsert: bool | Whether to update the document if it exists. Default is False. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + ... + + def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + """ + Update documents in the collection of the vector database. + + Args: + docs: List[Document] | A list of documents. + collection_name: Optional[str] | The name of the collection. Default is None. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + ... + + def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: + """ + Delete documents from the collection of the vector database. + + Args: + ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. + collection_name: Optional[str] | The name of the collection. Default is None. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + ... + + def retrieve_docs( + self, + queries: Sequence[str], + collection_name: Optional[str] = None, + n_results: int = 10, + distance_threshold: float = -1, + **kwargs: Any, + ) -> QueryResults: + """ + Retrieve documents from the collection of the vector database based on the queries. + + Args: + queries: List[str] | A list of queries. Each query is a string. + collection_name: Optional[str] | The name of the collection. Default is None. + n_results: int | The number of relevant documents to return. Default is 10. + distance_threshold: float | The threshold for the distance score, only distance smaller than it will be + returned. Don't filter with it if < 0. Default is -1. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + QueryResults | The query results. Each query result is a list of list of tuples containing the document and + the distance. + """ + ... + + def get_docs_by_ids( + self, + ids: Optional[Sequence[ItemID]] = None, + collection_name: Optional[str] = None, + include: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[Document]: + """ + Retrieve documents from the collection of the vector database based on the ids. + + Args: + ids: Optional[List[ItemID]] | A list of document ids. If None, will return all the documents. Default is None. + collection_name: Optional[str] | The name of the collection. Default is None. + include: Optional[List[str]] | The fields to include. Default is None. + If None, will include ["metadatas", "documents"], ids will always be included. This may differ + depending on the implementation. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + List[Document] | The results. + """ + ... diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py new file mode 100644 index 000000000000..618728859175 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py @@ -0,0 +1,688 @@ +# python\packages\autogen-ext\src\autogen_ext\storage\_chromadb.py + +import logging +import os +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union + +from autogen_core.application.logging import TRACE_LOGGER_NAME + +if TYPE_CHECKING: + from chromadb.api import AsyncClientAPI, Client + from chromadb.api.models.Collection import Collection + from chromadb.config import Settings + +from ._base import AsyncVectorDB, Document, ItemID, QueryResults, VectorDB +from ._utils import chroma_results_to_query_results, filter_results_by_distance + +CHROMADB_MAX_BATCH_SIZE = int(os.environ.get("CHROMADB_MAX_BATCH_SIZE", 40000)) +logger = logging.getLogger(f"{TRACE_LOGGER_NAME}.{__name__}") + + +class ChromaVectorDB(VectorDB): + """ + A vector database that uses ChromaDB as the backend. + + .. note:: + + This class requires the :code:`chromadb` extra for the :code:`autogen-ext` package. + """ + + ChromaError = Exception # Default to Exception if chromadb is not installed + + def __init__( + self, + *, + client: Optional["Client"] = None, + path: Optional[str] = None, + embedding_function: Optional[Callable[[List[str]], List[List[float]]]] = None, + metadata: Optional[Dict[str, Any]] = None, + client_type: str = "persistent", + host: str = "localhost", + port: int = 8000, + **kwargs: Any, + ) -> None: + """ + Initialize the vector database. + + Args: + client: chromadb.Client | The client object of the vector database. Default is None. + If provided, it will use the client object directly and ignore other arguments. + path: Optional[str] | The path to the vector database. Required if client_type is 'persistent'. + embedding_function: Callable | The embedding function used to generate the vector representation + of the documents. Default is None, SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") will be used. + metadata: dict | The metadata of the vector database. Default is None. + client_type: str | The type of client to use. Can be 'persistent' or 'http'. Default is 'persistent'. + host: str | The host of the HTTP server. Default is 'localhost'. + port: int | The port of the HTTP server. Default is 8000. + kwargs: dict | Additional keyword arguments. + + Returns: + None + """ + try: + import chromadb + + if chromadb.__version__ < "0.5.0": + raise ImportError("Please upgrade chromadb to version 0.5.0 or later.") + import chromadb.utils.embedding_functions as ef + from chromadb.errors import ChromaError + + ChromaVectorDB.ChromaError = ChromaError # Set the class attribute + except ImportError as e: + raise RuntimeError( + "Missing dependencies for ChromaVectorDB. Please ensure the autogen-ext package was installed with the 'chromadb' extra." + ) from e + + self.client: "Client" = client + self.embedding_function = ( + ef.SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") + if embedding_function is None + else embedding_function + ) + self.metadata = metadata if metadata else {} + self.type = "chroma" + if not self.client: + if client_type == "persistent": + if path is None: + raise ValueError("Persistent client requires a 'path' to save the database.") + self.client = chromadb.PersistentClient(path=path, **kwargs) + elif client_type == "http": + self.client = chromadb.HttpClient(host=host, port=port, **kwargs) + else: + raise ValueError(f"Invalid client_type: {client_type}") + self.active_collection: Optional["Collection"] = None + + def create_collection( + self, collection_name: str, overwrite: bool = False, get_or_create: bool = True + ) -> "Collection": + """ + Create a collection in the vector database. + Case 1. if the collection does not exist, create the collection. + Case 2. the collection exists, if overwrite is True, it will overwrite the collection. + Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, + otherwise it raises a ValueError. + + Args: + collection_name: str | The name of the collection. + overwrite: bool | Whether to overwrite the collection if it exists. Default is False. + get_or_create: bool | Whether to get the collection if it exists. Default is True. + + Returns: + Collection | The collection object. + """ + try: + if self.active_collection and self.active_collection.name == collection_name: + collection = self.active_collection + else: + collection = self.client.get_collection( + name=collection_name, embedding_function=self.embedding_function + ) + except (ValueError, ChromaVectorDB.ChromaError): + collection = None + if collection is None: + return self.client.create_collection( + name=collection_name, + embedding_function=self.embedding_function, + metadata=self.metadata, + ) + elif overwrite: + self.client.delete_collection(name=collection_name) + return self.client.create_collection( + name=collection_name, + embedding_function=self.embedding_function, + metadata=self.metadata, + ) + elif get_or_create: + return collection + else: + raise ValueError(f"Collection {collection_name} already exists.") + + def get_collection(self, collection_name: Optional[str] = None) -> "Collection": + """ + Get the collection from the vector database. + + Args: + collection_name: Optional[str] | The name of the collection. Default is None. + If None, return the current active collection. + + Returns: + Collection | The collection object. + """ + if collection_name is None: + if self.active_collection is None: + raise ValueError("No collection is specified.") + else: + logger.info( + f"No collection is specified. Using current active collection {self.active_collection.name}." + ) + else: + if not (self.active_collection and self.active_collection.name == collection_name): + self.active_collection = self.client.get_collection( + name=collection_name, embedding_function=self.embedding_function + ) + return self.active_collection + + def delete_collection(self, collection_name: str) -> None: + """ + Delete the collection from the vector database. + + Args: + collection_name: str | The name of the collection. + + Returns: + None + """ + self.client.delete_collection(name=collection_name) + if self.active_collection and self.active_collection.name == collection_name: + self.active_collection = None + + def _batch_insert( + self, + collection: "Collection", + embeddings: Optional[List[Any]] = None, + ids: Optional[List[str]] = None, + metadatas: Optional[List[Dict[str, Any]]] = None, + documents: Optional[List[str]] = None, + upsert: bool = False, + ) -> None: + batch_size = CHROMADB_MAX_BATCH_SIZE + for i in range(0, len(ids or []), batch_size): + end_idx = i + batch_size + collection_kwargs = { + "documents": documents[i:end_idx] if documents else None, + "ids": ids[i:end_idx] if ids else None, + "metadatas": metadatas[i:end_idx] if metadatas else None, + "embeddings": embeddings[i:end_idx] if embeddings else None, + } + if upsert: + collection.upsert(**collection_kwargs) # type: ignore + else: + collection.add(**collection_kwargs) # type: ignore + + def insert_docs( + self, + docs: List[Document], + collection_name: Optional[str] = None, + upsert: bool = False, + **kwargs: Any, + ) -> None: + """ + Insert documents into the collection of the vector database. + + Args: + docs: List[Document] | A list of documents. Each document is a Pydantic Document model. + collection_name: Optional[str] | The name of the collection. Default is None. + upsert: bool | Whether to update the document if it exists. Default is False. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + if not docs: + return + if docs[0].content is None and docs[0].embedding is None: + raise ValueError("Either document content or embedding is required.") + if docs[0].id is None: + raise ValueError("The document id is required.") + documents = [doc.content for doc in docs] if docs[0].content else None + ids = [str(doc.id) for doc in docs] + collection = self.get_collection(collection_name) + embeddings = [doc.embedding for doc in docs] if docs[0].embedding else None + if not embeddings and not documents: + raise ValueError("Either documents or embeddings must be provided.") + metadatas = [doc.metadata for doc in docs] if docs[0].metadata else None + self._batch_insert( + collection, + embeddings=embeddings, + ids=ids, + metadatas=metadatas, + documents=documents, + upsert=upsert, + ) + + def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + """ + Update documents in the collection of the vector database. + + Args: + docs: List[Document] | A list of documents. + collection_name: Optional[str] | The name of the collection. Default is None. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + self.insert_docs(docs, collection_name=collection_name, upsert=True, **kwargs) + + def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: + """ + Delete documents from the collection of the vector database. + + Args: + ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. + collection_name: Optional[str] | The name of the collection. Default is None. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + collection = self.get_collection(collection_name) + collection.delete(ids=ids) + + def retrieve_docs( + self, + queries: List[str], + collection_name: Optional[str] = None, + n_results: int = 10, + distance_threshold: float = -1, + **kwargs: Any, + ) -> QueryResults: + """ + Retrieve documents from the collection of the vector database based on the queries. + + Args: + queries: List[str] | A list of queries. Each query is a string. + collection_name: Optional[str] | The name of the collection. Default is None. + n_results: int | The number of relevant documents to return. Default is 10. + distance_threshold: float | The threshold for the distance score, only distance smaller than it will be + returned. Don't filter with it if < 0. Default is -1. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + QueryResults | The query results. Each query result is a list of list of tuples containing the document and + the distance. + """ + collection = self.get_collection(collection_name) + if isinstance(queries, str): + queries = [queries] + results = collection.query( + query_texts=queries, + n_results=n_results, + ) + results["contents"] = results.pop("documents") + results = chroma_results_to_query_results(results) + results = filter_results_by_distance(results, distance_threshold) + return results + + @staticmethod + def _chroma_get_results_to_list_documents(data_dict: Dict[str, Any]) -> List[Document]: + """Converts a dictionary with list values to a list of Document. + + Args: + data_dict: A dictionary where keys map to lists or None. + + Returns: + List[Document] | The list of Document. + """ + results: List[Document] = [] + keys = [key for key in data_dict if data_dict[key] is not None] + + for i in range(len(data_dict[keys[0]])): + doc_dict = {} + for key in data_dict.keys(): + if data_dict[key] is not None and len(data_dict[key]) > i: + doc_dict[key[:-1]] = data_dict[key][i] + results.append(Document(**doc_dict)) # type: ignore + return results + + def get_docs_by_ids( + self, + ids: Optional[List[ItemID]] = None, + collection_name: Optional[str] = None, + include: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[Document]: + """ + Retrieve documents from the collection of the vector database based on the ids. + + Args: + ids: Optional[List[ItemID]] | A list of document ids. If None, will return all the documents. Default is None. + collection_name: Optional[str] | The name of the collection. Default is None. + include: Optional[List[str]] | The fields to include. Default is None. + If None, will include ["metadatas", "documents"]. IDs are always included. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + List[Document] | The results. + """ + collection = self.get_collection(collection_name) + if include is None: + include = ["metadatas", "documents"] + results = collection.get(ids=ids, include=include) + results = self._chroma_get_results_to_list_documents(results) + return results + + +class AsyncChromaVectorDB(AsyncVectorDB): + """ + An asynchronous vector database that uses ChromaDB as the backend. + + .. note:: + + This class requires the :code:`chromadb` extra for the :code:`autogen-ext` package. + """ + + ChromaError = Exception # Default to Exception if chromadb is not installed + + def __init__( + self, + *, + client: "AsyncClientAPI", + embedding_function: Optional[Callable[[List[str]], List[List[float]]]] = None, + host: str = "localhost", + port: int = 8000, + ssl: bool = False, + headers: Optional[Dict[str, str]] = None, + settings: Optional["Settings"] = None, + tenant: str = "default_tenant", + database: str = "default_database", + **kwargs: Any, + ) -> None: + """ + Initialize the async vector database. + + Args: + client: chromadb.AsyncClientAPI | The client object of the vector database. Default is None. + If provided, it will use the client object directly and ignore other arguments. + embedding_function: Callable | The embedding function used to generate the vector representation + of the documents. Default is None. Must be provided for async client. + host: str | The host of the HTTP server. Default is 'localhost'. + port: int | The port of the HTTP server. Default is 8000. + ssl: bool | Whether to use SSL to connect to the Chroma server. Defaults to False. + headers: Optional[Dict[str, str]] | A dictionary of headers to send to the Chroma server. Defaults to None. + settings: Optional[Settings] | A dictionary of settings to communicate with the chroma server. + tenant: str | The tenant to use for this client. Defaults to "default_tenant". + database: str | The database to use for this client. Defaults to "default_database". + kwargs: dict | Additional keyword arguments. + + Returns: + None + """ + try: + import chromadb + + if chromadb.__version__ < "0.5.0": + raise ImportError("Please upgrade chromadb to version 0.5.0 or later.") + from chromadb.errors import ChromaError + + AsyncChromaVectorDB.ChromaError = ChromaError # Set the class attribute + except ImportError as e: + raise RuntimeError( + "Missing dependencies for AsyncChromaVectorDB. Please ensure the autogen-ext package was installed with the 'chromadb' extra." + ) from e + + self.client: "AsyncClientAPI" = client + self.embedding_function = embedding_function + if self.embedding_function is None: + raise ValueError("An embedding function must be provided for AsyncChromaVectorDB.") + self.type = "chroma" + if not self.client: + self.client = chromadb.AsyncHttpClient( + host=host, + port=port, + ssl=ssl, + headers=headers, + settings=settings, + tenant=tenant, + database=database, + **kwargs, + ) + self.active_collection: Optional["Collection"] = None + + async def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: + """ + Create a collection in the vector database. + Case 1. if the collection does not exist, create the collection. + Case 2. the collection exists, if overwrite is True, it will overwrite the collection. + Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, + otherwise it raises a ValueError. + + Args: + collection_name: str | The name of the collection. + overwrite: bool | Whether to overwrite the collection if it exists. Default is False. + get_or_create: bool | Whether to get the collection if it exists. Default is True. + + Returns: + Any | The collection object. + """ + try: + if self.active_collection and self.active_collection.name == collection_name: + collection = self.active_collection + else: + collection = await self.client.get_collection( + name=collection_name, embedding_function=self.embedding_function + ) + except (ValueError, AsyncChromaVectorDB.ChromaError): + collection = None + if collection is None: + return await self.client.create_collection( + name=collection_name, + embedding_function=self.embedding_function, + metadata={}, + ) + elif overwrite: + await self.client.delete_collection(name=collection_name) + return await self.client.create_collection( + name=collection_name, + embedding_function=self.embedding_function, + metadata={}, + ) + elif get_or_create: + return collection + else: + raise ValueError(f"Collection {collection_name} already exists.") + + async def get_collection(self, collection_name: Optional[str] = None) -> Any: + """ + Get the collection from the vector database. + + Args: + collection_name: Optional[str] | The name of the collection. Default is None. + If None, return the current active collection. + + Returns: + Any | The collection object. + """ + if collection_name is None: + if self.active_collection is None: + raise ValueError("No collection is specified.") + else: + logger.info( + f"No collection is specified. Using current active collection {self.active_collection.name}." + ) + else: + if not (self.active_collection and self.active_collection.name == collection_name): + self.active_collection = await self.client.get_collection( + name=collection_name, embedding_function=self.embedding_function + ) + return self.active_collection + + async def delete_collection(self, collection_name: str) -> Any: + """ + Delete the collection from the vector database. + + Args: + collection_name: str | The name of the collection. + + Returns: + Any + """ + await self.client.delete_collection(name=collection_name) + if self.active_collection and self.active_collection.name == collection_name: + self.active_collection = None + + async def _batch_insert( + self, + collection: Any, + embeddings: Optional[List[Any]] = None, + ids: Optional[List[str]] = None, + metadatas: Optional[List[Dict[str, Any]]] = None, + documents: Optional[List[str]] = None, + upsert: bool = False, + ) -> None: + batch_size = CHROMADB_MAX_BATCH_SIZE + for i in range(0, len(ids or []), batch_size): + end_idx = i + batch_size + collection_kwargs = { + "documents": documents[i:end_idx] if documents else None, + "ids": ids[i:end_idx] if ids else None, + "metadatas": metadatas[i:end_idx] if metadatas else None, + "embeddings": embeddings[i:end_idx] if embeddings else None, + } + if upsert: + await collection.upsert(**collection_kwargs) + else: + await collection.add(**collection_kwargs) + + async def insert_docs( + self, + docs: List[Document], + collection_name: Optional[str] = None, + upsert: bool = False, + **kwargs: Any, + ) -> None: + """ + Insert documents into the collection of the vector database. + + Args: + docs: List[Document] | A list of documents. Each document is a Pydantic Document model. + collection_name: Optional[str] | The name of the collection. Default is None. + upsert: bool | Whether to update the document if it exists. Default is False. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + if not docs: + return + if docs[0].content is None and docs[0].embedding is None: + raise ValueError("Either document content or embedding is required.") + if docs[0].id is None: + raise ValueError("The document id is required.") + documents = [doc.content for doc in docs] if docs[0].content else None + ids = [str(doc.id) for doc in docs] + collection = await self.get_collection(collection_name) + embeddings = [doc.embedding for doc in docs] if docs[0].embedding else None + if not embeddings and not documents: + raise ValueError("Either documents or embeddings must be provided.") + metadatas = [doc.metadata for doc in docs] if docs[0].metadata else None + await self._batch_insert( + collection, + embeddings=embeddings, + ids=ids, + metadatas=metadatas, + documents=documents, + upsert=upsert, + ) + + async def update_docs(self, docs: List[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + """ + Update documents in the collection of the vector database. + + Args: + docs: List[Document] | A list of documents. + collection_name: Optional[str] | The name of the collection. Default is None. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + await self.insert_docs(docs, collection_name=collection_name, upsert=True, **kwargs) + + async def delete_docs(self, ids: List[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: + """ + Delete documents from the collection of the vector database. + + Args: + ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. + collection_name: Optional[str] | The name of the collection. Default is None. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + None + """ + collection = await self.get_collection(collection_name) + await collection.delete(ids=ids) + + async def retrieve_docs( + self, + queries: List[str], + collection_name: Optional[str] = None, + n_results: int = 10, + distance_threshold: float = -1, + **kwargs: Any, + ) -> QueryResults: + """ + Retrieve documents from the collection of the vector database based on the queries. + + Args: + queries: List[str] | A list of queries. Each query is a string. + collection_name: Optional[str] | The name of the collection. Default is None. + n_results: int | The number of relevant documents to return. Default is 10. + distance_threshold: float | The threshold for the distance score, only distance smaller than it will be + returned. Don't filter with it if < 0. Default is -1. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + QueryResults | The query results. Each query result is a list of list of tuples containing the document and + the distance. + """ + collection = await self.get_collection(collection_name) + if isinstance(queries, str): + queries = [queries] + results = await collection.query( + query_texts=queries, + n_results=n_results, + ) + results["contents"] = results.pop("documents") + results = chroma_results_to_query_results(results) + results = filter_results_by_distance(results, distance_threshold) + return results + + @staticmethod + def _chroma_get_results_to_list_documents(data_dict: Dict[str, Any]) -> List[Document]: + """Converts a dictionary with list values to a list of Document. + + Args: + data_dict: A dictionary where keys map to lists or None. + + Returns: + List[Document] | The list of Document. + """ + results = [] + keys = [key for key in data_dict if data_dict[key] is not None] + + for i in range(len(data_dict[keys[0]])): + doc_dict = {} + for key in data_dict.keys(): + if data_dict[key] is not None and len(data_dict[key]) > i: + doc_dict[key[:-1]] = data_dict[key][i] + results.append(Document(**doc_dict)) # type: ignore + return results + + async def get_docs_by_ids( + self, + ids: Optional[List[ItemID]] = None, + collection_name: Optional[str] = None, + include: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[Document]: + """ + Retrieve documents from the collection of the vector database based on the ids. + + Args: + ids: Optional[List[ItemID]] | A list of document ids. If None, will return all the documents. Default is None. + collection_name: Optional[str] | The name of the collection. Default is None. + include: Optional[List[str]] | The fields to include. Default is None. + If None, will include ["metadatas", "documents"]. IDs are always included. + kwargs: Dict[str, Any] | Additional keyword arguments. + + Returns: + List[Document] | The results. + """ + collection = await self.get_collection(collection_name) + if include is None: + include = ["metadatas", "documents"] + results = await collection.get(ids=ids, include=include) + results = self._chroma_get_results_to_list_documents(results) + return results diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_factory.py b/python/packages/autogen-ext/src/autogen_ext/storage/_factory.py new file mode 100644 index 000000000000..ba9f2d047494 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_factory.py @@ -0,0 +1,33 @@ +from typing import Literal + +from ._base import VectorDB + + +class VectorDBFactory: + """ + Factory class for creating vector databases. + """ + + PREDEFINED_VECTOR_DB = ["chromadb"] + + @staticmethod + def create_vector_db(db_type: Literal["chromadb"], **kwargs) -> VectorDB: + """ + Create a vector database. + + Args: + db_type: Literal["chroma", "chromadb"] | The type of the vector database. + kwargs: Dict | The keyword arguments for initializing the vector database. + + Returns: + VectorDB | The vector database. + """ + if db_type.lower() == "chromadb": + from ._chromadb import ChromaVectorDB + + return ChromaVectorDB(**kwargs) + + else: + raise ValueError( + f"Unsupported vector database type: {db_type}. Valid types are {VectorDBFactory.PREDEFINED_VECTOR_DB}." + ) diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_utils.py b/python/packages/autogen-ext/src/autogen_ext/storage/_utils.py new file mode 100644 index 000000000000..fe4c08e9edf6 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_utils.py @@ -0,0 +1,93 @@ +from typing import Any, Dict, List, Optional, Tuple + +from ._base import QueryResults + + +def filter_results_by_distance(results: QueryResults, distance_threshold: float = -1) -> QueryResults: + """Filters results based on a distance threshold. + + Args: + results: QueryResults | The query results. List[List[Tuple[Document, float]]] + distance_threshold: The maximum distance allowed for results. + + Returns: + QueryResults | A filtered results containing only distances smaller than the threshold. + """ + + if distance_threshold > 0: + results = [[(key, value) for key, value in data if value < distance_threshold] for data in results] + + return results + + +def chroma_results_to_query_results( + data_dict: Dict[str, Optional[List[List[Any]]]], special_key: str = "distances" +) -> List[List[Tuple[Dict[str, Any], float]]]: + """Converts a dictionary with list-of-list values to a list of tuples. + + Args: + data_dict: A dictionary where keys map to lists of lists or None. + special_key: str | The key in the dictionary containing the special values + for each tuple. + + Returns: + List[List[Tuple[Dict[str, Any], float]]] | A list of tuples, where each tuple contains + a sub-dictionary with some keys from the original dictionary and the value from the + special_key. + + Example: + data_dict = { + "key1s": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + "key2s": [["a", "b", "c"], ["c", "d", "e"], ["e", "f", "g"]], + "key3s": None, + "key4s": [["x", "y", "z"], ["1", "2", "3"], ["4", "5", "6"]], + "distances": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], + } + + results = [ + [ + ({"key1": 1, "key2": "a", "key4": "x"}, 0.1), + ({"key1": 2, "key2": "b", "key4": "y"}, 0.2), + ({"key1": 3, "key2": "c", "key4": "z"}, 0.3), + ], + [ + ({"key1": 4, "key2": "c", "key4": "1"}, 0.4), + ({"key1": 5, "key2": "d", "key4": "2"}, 0.5), + ({"key1": 6, "key2": "e", "key4": "3"}, 0.6), + ], + [ + ({"key1": 7, "key2": "e", "key4": "4"}, 0.7), + ({"key1": 8, "key2": "f", "key4": "5"}, 0.8), + ({"key1": 9, "key2": "g", "key4": "6"}, 0.9), + ], + ] + """ + + if not data_dict or special_key not in data_dict or not data_dict[special_key]: + return [] + + keys: List[str] = [ + key + for key in data_dict + if key != special_key + and data_dict[key] is not None + and isinstance(data_dict[key], list) + and len(data_dict[key]) > 0 + and isinstance(data_dict[key][0], list) + ] + result: List[List[Tuple[Dict[str, Any], float]]] = [] + data_special_key = data_dict[special_key] + + assert data_special_key is not None + + for i in range(len(data_special_key)): + sub_result: List[Tuple[Dict[str, Any], float]] = [] + for j, distance in enumerate(data_special_key[i]): + sub_dict: Dict[str, Any] = {} + for key in keys: + if len(data_dict[key]) > i and len(data_dict[key][i]) > j: + sub_dict[key[:-1]] = data_dict[key][i][j] # remove 's' at the end from key + sub_result.append((sub_dict, distance)) + result.append(sub_result) + + return result diff --git a/python/packages/autogen-ext/tests/storage/test_chroma_db.py b/python/packages/autogen-ext/tests/storage/test_chroma_db.py new file mode 100644 index 000000000000..5be4e06a014f --- /dev/null +++ b/python/packages/autogen-ext/tests/storage/test_chroma_db.py @@ -0,0 +1,80 @@ +from autogen_ext.storage._base import Document +import pytest +from autogen_ext.storage import ChromaVectorDB +from chromadb.errors import ChromaError + + +# @pytest.mark.skipif(skip, reason="dependency is not installed") +def test_chromadb(): + # test create collection + db = ChromaVectorDB(path=".db") + collection_name = "test_collection" + collection = db.create_collection(collection_name, overwrite=True, get_or_create=True) + assert collection.name == collection_name + + # test_delete_collection + db.delete_collection(collection_name) + pytest.raises((ValueError, ChromaError), db.get_collection, collection_name) + + # test more create collection + collection = db.create_collection(collection_name, overwrite=False, get_or_create=False) + assert collection.name == collection_name + pytest.raises( + (ValueError, ChromaError), db.create_collection, collection_name, overwrite=False, get_or_create=False + ) + collection = db.create_collection(collection_name, overwrite=True, get_or_create=False) + assert collection.name == collection_name + collection = db.create_collection(collection_name, overwrite=False, get_or_create=True) + assert collection.name == collection_name + + # test_get_collection + collection = db.get_collection(collection_name) + assert collection.name == collection_name + + # test_insert_docs + docs = [Document(content="doc1", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] + db.insert_docs(docs, collection_name, upsert=False) + res = db.get_collection(collection_name).get(["1", "2"]) + assert res["documents"] == ["doc1", "doc2"] + + # test_update_docs + docs = [Document(content="doc11", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] + db.update_docs(docs, collection_name) + res = db.get_collection(collection_name).get(["1", "2"]) + assert res["documents"] == ["doc11", "doc2"] + + # test_delete_docs + ids = ["1"] + collection_name = "test_collection" + db.delete_docs(ids, collection_name) + res = db.get_collection(collection_name).get(ids) + assert res["documents"] == [] + + # test_retrieve_docs + queries = ["doc2", "doc3"] + collection_name = "test_collection" + res = db.retrieve_docs(queries, collection_name) + assert [[r[0].id for r in rr] for rr in res] == [["2", "3"], ["3", "2"]] + res = db.retrieve_docs(queries, collection_name, distance_threshold=0.1) + assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] + + # test_get_docs_by_ids + res = db.get_docs_by_ids(["1", "2"], collection_name) + assert [r.id for r in res] == ["2"] # "1" has been deleted + res = db.get_docs_by_ids(collection_name=collection_name) + assert [r.id for r in res] == ["2", "3"] + + # test _chroma_get_results_to_list_documents + data_dict = { + "key1s": [1, 2, 3], + "key2s": ["a", "b", "c"], + "key3s": None, + "key4s": ["x", "y", "z"], + } + + results = [ + {"key1": 1, "key2": "a", "key4": "x"}, + {"key1": 2, "key2": "b", "key4": "y"}, + {"key1": 3, "key2": "c", "key4": "z"}, + ] + assert db._chroma_get_results_to_list_documents(data_dict) == results # type: ignore diff --git a/python/uv.lock b/python/uv.lock index ed20c3784d79..02bba38dfa1a 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -16,12 +16,18 @@ resolution-markers = [ "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version < '3.11' and platform_system != 'Darwin' and platform_system != 'Linux')", "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version == '3.11.*' and platform_system != 'Darwin' and platform_system != 'Linux')", "(python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_system != 'Darwin' and platform_system != 'Linux')", - "python_full_version < '3.13' and platform_system == 'Darwin'", - "python_full_version >= '3.13' and platform_system == 'Darwin'", - "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_system == 'Linux'", - "python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_system == 'Linux'", - "(python_full_version < '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version < '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')", - "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')", + "python_full_version < '3.11' and platform_system == 'Darwin'", + "python_full_version == '3.11.*' and platform_system == 'Darwin'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_system == 'Darwin'", + "python_full_version >= '3.12.4' and platform_system == 'Darwin'", + "python_full_version < '3.11' and platform_machine == 'aarch64' and platform_system == 'Linux'", + "python_full_version == '3.11.*' and platform_machine == 'aarch64' and platform_system == 'Linux'", + "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine == 'aarch64' and platform_system == 'Linux'", + "python_full_version >= '3.12.4' and platform_machine == 'aarch64' and platform_system == 'Linux'", + "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version < '3.11' and platform_system != 'Darwin' and platform_system != 'Linux')", + "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version == '3.11.*' and platform_system != 'Darwin' and platform_system != 'Linux')", + "(python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_system != 'Darwin' and platform_system != 'Linux')", + "(python_full_version >= '3.12.4' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.12.4' and platform_system != 'Darwin' and platform_system != 'Linux')", ] [manifest] @@ -309,6 +315,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/7b/7bf42178d227b26d3daf94cdd22a72a4ed5bf235548c4f5aea49c51c6458/arxiv-2.1.3-py3-none-any.whl", hash = "sha256:6f43673ab770a9e848d7d4fc1894824df55edeac3c3572ea280c9ba2e3c0f39f", size = 11478 }, ] +[[package]] +name = "asgiref" +version = "3.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/29/38/b3395cc9ad1b56d2ddac9970bc8f4141312dbaec28bc7c218b0dfafd0f42/asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590", size = 35186 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/e3/893e8757be2612e6c266d9bb58ad2e3651524b5b40cf56761e985a28b13e/asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47", size = 23828 }, +] + [[package]] name = "asttokens" version = "2.4.1" @@ -439,7 +457,7 @@ requires-dist = [ { name = "opentelemetry-api", specifier = "~=1.27.0" }, { name = "pillow" }, { name = "protobuf", specifier = "~=4.25.1" }, - { name = "pydantic", specifier = "<3.0.0,>=2.0.0" }, + { name = "pydantic", specifier = ">=2.0.0,<3.0.0" }, { name = "tiktoken" }, { name = "typing-extensions" }, ] @@ -487,7 +505,7 @@ dev = [ [[package]] name = "autogen-ext" -version = "0.4.0.dev2" +version = "0.4.0.dev1" source = { editable = "packages/autogen-ext" } dependencies = [ { name = "autogen-core" }, @@ -501,6 +519,10 @@ azure = [ azure-code-executor = [ { name = "azure-core" }, ] +chromadb = [ + { name = "chromadb" }, + { name = "sentence-transformers" }, +] docker = [ { name = "docker" }, ] @@ -523,11 +545,13 @@ requires-dist = [ { name = "azure-core", marker = "extra == 'azure'" }, { name = "azure-core", marker = "extra == 'azure-code-executor'" }, { name = "azure-identity", marker = "extra == 'azure'" }, + { name = "chromadb", marker = "extra == 'chromadb'", specifier = "~=0.5.15" }, { name = "docker", marker = "extra == 'docker'", specifier = "~=7.0" }, { name = "docker", marker = "extra == 'docker-code-executor'", specifier = "~=7.0" }, { name = "langchain-core", marker = "extra == 'langchain'", specifier = "~=0.3.3" }, { name = "langchain-core", marker = "extra == 'langchain-tools'", specifier = "~=0.3.3" }, { name = "openai", marker = "extra == 'openai'", specifier = ">=1.3" }, + { name = "sentence-transformers", marker = "extra == 'chromadb'" }, ] [[package]] @@ -581,7 +605,7 @@ requires-dist = [ { name = "pdfminer-six" }, { name = "playwright" }, { name = "puremagic" }, - { name = "pydantic", specifier = "<3.0.0,>=2.0.0" }, + { name = "pydantic", specifier = ">=2.0.0,<3.0.0" }, { name = "pydub" }, { name = "python-pptx" }, { name = "requests" }, @@ -700,6 +724,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, ] +[[package]] +name = "backoff" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148 }, +] + +[[package]] +name = "bcrypt" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/7e/d95e7d96d4828e965891af92e43b52a4cd3395dc1c1ef4ee62748d0471d0/bcrypt-4.2.0.tar.gz", hash = "sha256:cf69eaf5185fd58f268f805b505ce31f9b9fc2d64b376642164e9244540c1221", size = 24294 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/81/4e8f5bc0cd947e91fb720e1737371922854da47a94bc9630454e7b2845f8/bcrypt-4.2.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:096a15d26ed6ce37a14c1ac1e48119660f21b24cba457f160a4b830f3fe6b5cb", size = 471568 }, + { url = "https://files.pythonhosted.org/packages/05/d2/1be1e16aedec04bcf8d0156e01b987d16a2063d38e64c3f28030a3427d61/bcrypt-4.2.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c02d944ca89d9b1922ceb8a46460dd17df1ba37ab66feac4870f6862a1533c00", size = 277372 }, + { url = "https://files.pythonhosted.org/packages/e3/96/7a654027638ad9b7589effb6db77eb63eba64319dfeaf9c0f4ca953e5f76/bcrypt-4.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d84cf6d877918620b687b8fd1bf7781d11e8a0998f576c7aa939776b512b98d", size = 273488 }, + { url = "https://files.pythonhosted.org/packages/46/54/dc7b58abeb4a3d95bab653405935e27ba32f21b812d8ff38f271fb6f7f55/bcrypt-4.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1bb429fedbe0249465cdd85a58e8376f31bb315e484f16e68ca4c786dcc04291", size = 277759 }, + { url = "https://files.pythonhosted.org/packages/ac/be/da233c5f11fce3f8adec05e8e532b299b64833cc962f49331cdd0e614fa9/bcrypt-4.2.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:655ea221910bcac76ea08aaa76df427ef8625f92e55a8ee44fbf7753dbabb328", size = 273796 }, + { url = "https://files.pythonhosted.org/packages/b0/b8/8b4add88d55a263cf1c6b8cf66c735280954a04223fcd2880120cc767ac3/bcrypt-4.2.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:1ee38e858bf5d0287c39b7a1fc59eec64bbf880c7d504d3a06a96c16e14058e7", size = 311082 }, + { url = "https://files.pythonhosted.org/packages/7b/76/2aa660679abbdc7f8ee961552e4bb6415a81b303e55e9374533f22770203/bcrypt-4.2.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:0da52759f7f30e83f1e30a888d9163a81353ef224d82dc58eb5bb52efcabc399", size = 305912 }, + { url = "https://files.pythonhosted.org/packages/00/03/2af7c45034aba6002d4f2b728c1a385676b4eab7d764410e34fd768009f2/bcrypt-4.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3698393a1b1f1fd5714524193849d0c6d524d33523acca37cd28f02899285060", size = 325185 }, + { url = "https://files.pythonhosted.org/packages/dc/5d/6843443ce4ab3af40bddb6c7c085ed4a8418b3396f7a17e60e6d9888416c/bcrypt-4.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:762a2c5fb35f89606a9fde5e51392dad0cd1ab7ae64149a8b935fe8d79dd5ed7", size = 335188 }, + { url = "https://files.pythonhosted.org/packages/cb/4c/ff8ca83d816052fba36def1d24e97d9a85739b9bbf428c0d0ecd296a07c8/bcrypt-4.2.0-cp37-abi3-win32.whl", hash = "sha256:5a1e8aa9b28ae28020a3ac4b053117fb51c57a010b9f969603ed885f23841458", size = 156481 }, + { url = "https://files.pythonhosted.org/packages/65/f1/e09626c88a56cda488810fb29d5035f1662873777ed337880856b9d204ae/bcrypt-4.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:8f6ede91359e5df88d1f5c1ef47428a4420136f3ce97763e31b86dd8280fbdf5", size = 151336 }, + { url = "https://files.pythonhosted.org/packages/96/86/8c6a84daed4dd878fbab094400c9174c43d9b838ace077a2f8ee8bc3ae12/bcrypt-4.2.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:c52aac18ea1f4a4f65963ea4f9530c306b56ccd0c6f8c8da0c06976e34a6e841", size = 472414 }, + { url = "https://files.pythonhosted.org/packages/f6/05/e394515f4e23c17662e5aeb4d1859b11dc651be01a3bd03c2e919a155901/bcrypt-4.2.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bbbfb2734f0e4f37c5136130405332640a1e46e6b23e000eeff2ba8d005da68", size = 277599 }, + { url = "https://files.pythonhosted.org/packages/4b/3b/ad784eac415937c53da48983756105d267b91e56aa53ba8a1b2014b8d930/bcrypt-4.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3413bd60460f76097ee2e0a493ccebe4a7601918219c02f503984f0a7ee0aebe", size = 273491 }, + { url = "https://files.pythonhosted.org/packages/cc/14/b9ff8e0218bee95e517b70e91130effb4511e8827ac1ab00b4e30943a3f6/bcrypt-4.2.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8d7bb9c42801035e61c109c345a28ed7e84426ae4865511eb82e913df18f58c2", size = 277934 }, + { url = "https://files.pythonhosted.org/packages/3e/d0/31938bb697600a04864246acde4918c4190a938f891fd11883eaaf41327a/bcrypt-4.2.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3d3a6d28cb2305b43feac298774b997e372e56c7c7afd90a12b3dc49b189151c", size = 273804 }, + { url = "https://files.pythonhosted.org/packages/e7/c3/dae866739989e3f04ae304e1201932571708cb292a28b2f1b93283e2dcd8/bcrypt-4.2.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:9c1c4ad86351339c5f320ca372dfba6cb6beb25e8efc659bedd918d921956bae", size = 311275 }, + { url = "https://files.pythonhosted.org/packages/5d/2c/019bc2c63c6125ddf0483ee7d914a405860327767d437913942b476e9c9b/bcrypt-4.2.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:27fe0f57bb5573104b5a6de5e4153c60814c711b29364c10a75a54bb6d7ff48d", size = 306355 }, + { url = "https://files.pythonhosted.org/packages/75/fe/9e137727f122bbe29771d56afbf4e0dbc85968caa8957806f86404a5bfe1/bcrypt-4.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8ac68872c82f1add6a20bd489870c71b00ebacd2e9134a8aa3f98a0052ab4b0e", size = 325381 }, + { url = "https://files.pythonhosted.org/packages/1a/d4/586b9c18a327561ea4cd336ff4586cca1a7aa0f5ee04e23a8a8bb9ca64f1/bcrypt-4.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cb2a8ec2bc07d3553ccebf0746bbf3d19426d1c6d1adbd4fa48925f66af7b9e8", size = 335685 }, + { url = "https://files.pythonhosted.org/packages/24/55/1a7127faf4576138bb278b91e9c75307490178979d69c8e6e273f74b974f/bcrypt-4.2.0-cp39-abi3-win32.whl", hash = "sha256:77800b7147c9dc905db1cba26abe31e504d8247ac73580b4aa179f98e6608f34", size = 155857 }, + { url = "https://files.pythonhosted.org/packages/1c/2a/c74052e54162ec639266d91539cca7cbf3d1d3b8b36afbfeaee0ea6a1702/bcrypt-4.2.0-cp39-abi3-win_amd64.whl", hash = "sha256:61ed14326ee023917ecd093ee6ef422a72f3aec6f07e21ea5f10622b735538a9", size = 151717 }, + { url = "https://files.pythonhosted.org/packages/09/97/01026e7b1b7f8aeb41514408eca1137c0f8aef9938335e3bc713f82c282e/bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:39e1d30c7233cfc54f5c3f2c825156fe044efdd3e0b9d309512cc514a263ec2a", size = 275924 }, + { url = "https://files.pythonhosted.org/packages/ca/46/03eb26ea3e9c12ca18d1f3bf06199f7d72ce52e68f2a1ebcfd8acff9c472/bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f4f4acf526fcd1c34e7ce851147deedd4e26e6402369304220250598b26448db", size = 272242 }, +] + [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -724,6 +789,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/24/7e/f7b6f453e6481d1e233540262ccbfcf89adcd43606f44a028d7f5fae5eb2/binaryornot-0.4.4-py2.py3-none-any.whl", hash = "sha256:b8b71173c917bddcd2c16070412e369c3ed7f0528926f70cac18a6c97fd563e4", size = 9006 }, ] +[[package]] +name = "build" +version = "1.2.2.post1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "os_name == 'nt'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10.2'" }, + { name = "packaging" }, + { name = "pyproject-hooks" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/46/aeab111f8e06793e4f0e421fcad593d547fb8313b50990f31681ee2fb1ad/build-1.2.2.post1.tar.gz", hash = "sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7", size = 46701 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/c2/80633736cd183ee4a62107413def345f7e6e3c01563dbca1417363cf957e/build-1.2.2.post1-py3-none-any.whl", hash = "sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5", size = 22950 }, +] + [[package]] name = "cachetools" version = "5.5.0" @@ -871,6 +952,70 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d6/d8/15cfcb738d2518daf04d34b23419bd359cbd8e09da50778ebac521774fc8/chess-1.10.0-py3-none-any.whl", hash = "sha256:48ff7c084a370811819cfc753c2ee159942356ada70824666bd01ee3fca170d0", size = 154405 }, ] +[[package]] +name = "chroma-hnswlib" +version = "0.7.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/09/10d57569e399ce9cbc5eee2134996581c957f63a9addfa6ca657daf006b8/chroma_hnswlib-0.7.6.tar.gz", hash = "sha256:4dce282543039681160259d29fcde6151cc9106c6461e0485f57cdccd83059b7", size = 32256 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/74/b9dde05ea8685d2f8c4681b517e61c7887e974f6272bb24ebc8f2105875b/chroma_hnswlib-0.7.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f35192fbbeadc8c0633f0a69c3d3e9f1a4eab3a46b65458bbcbcabdd9e895c36", size = 195821 }, + { url = "https://files.pythonhosted.org/packages/fd/58/101bfa6bc41bc6cc55fbb5103c75462a7bf882e1704256eb4934df85b6a8/chroma_hnswlib-0.7.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f007b608c96362b8f0c8b6b2ac94f67f83fcbabd857c378ae82007ec92f4d82", size = 183854 }, + { url = "https://files.pythonhosted.org/packages/17/ff/95d49bb5ce134f10d6aa08d5f3bec624eaff945f0b17d8c3fce888b9a54a/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:456fd88fa0d14e6b385358515aef69fc89b3c2191706fd9aee62087b62aad09c", size = 2358774 }, + { url = "https://files.pythonhosted.org/packages/3a/6d/27826180a54df80dbba8a4f338b022ba21c0c8af96fd08ff8510626dee8f/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dfaae825499c2beaa3b75a12d7ec713b64226df72a5c4097203e3ed532680da", size = 2392739 }, + { url = "https://files.pythonhosted.org/packages/d6/63/ee3e8b7a8f931918755faacf783093b61f32f59042769d9db615999c3de0/chroma_hnswlib-0.7.6-cp310-cp310-win_amd64.whl", hash = "sha256:2487201982241fb1581be26524145092c95902cb09fc2646ccfbc407de3328ec", size = 150955 }, + { url = "https://files.pythonhosted.org/packages/f5/af/d15fdfed2a204c0f9467ad35084fbac894c755820b203e62f5dcba2d41f1/chroma_hnswlib-0.7.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81181d54a2b1e4727369486a631f977ffc53c5533d26e3d366dda243fb0998ca", size = 196911 }, + { url = "https://files.pythonhosted.org/packages/0d/19/aa6f2139f1ff7ad23a690ebf2a511b2594ab359915d7979f76f3213e46c4/chroma_hnswlib-0.7.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b4ab4e11f1083dd0a11ee4f0e0b183ca9f0f2ed63ededba1935b13ce2b3606f", size = 185000 }, + { url = "https://files.pythonhosted.org/packages/79/b1/1b269c750e985ec7d40b9bbe7d66d0a890e420525187786718e7f6b07913/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53db45cd9173d95b4b0bdccb4dbff4c54a42b51420599c32267f3abbeb795170", size = 2377289 }, + { url = "https://files.pythonhosted.org/packages/c7/2d/d5663e134436e5933bc63516a20b5edc08b4c1b1588b9680908a5f1afd04/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c093f07a010b499c00a15bc9376036ee4800d335360570b14f7fe92badcdcf9", size = 2411755 }, + { url = "https://files.pythonhosted.org/packages/3e/79/1bce519cf186112d6d5ce2985392a89528c6e1e9332d680bf752694a4cdf/chroma_hnswlib-0.7.6-cp311-cp311-win_amd64.whl", hash = "sha256:0540b0ac96e47d0aa39e88ea4714358ae05d64bbe6bf33c52f316c664190a6a3", size = 151888 }, + { url = "https://files.pythonhosted.org/packages/93/ac/782b8d72de1c57b64fdf5cb94711540db99a92768d93d973174c62d45eb8/chroma_hnswlib-0.7.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e87e9b616c281bfbe748d01705817c71211613c3b063021f7ed5e47173556cb7", size = 197804 }, + { url = "https://files.pythonhosted.org/packages/32/4e/fd9ce0764228e9a98f6ff46af05e92804090b5557035968c5b4198bc7af9/chroma_hnswlib-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec5ca25bc7b66d2ecbf14502b5729cde25f70945d22f2aaf523c2d747ea68912", size = 185421 }, + { url = "https://files.pythonhosted.org/packages/d9/3d/b59a8dedebd82545d873235ef2d06f95be244dfece7ee4a1a6044f080b18/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:305ae491de9d5f3c51e8bd52d84fdf2545a4a2bc7af49765cda286b7bb30b1d4", size = 2389672 }, + { url = "https://files.pythonhosted.org/packages/74/1e/80a033ea4466338824974a34f418e7b034a7748bf906f56466f5caa434b0/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:822ede968d25a2c88823ca078a58f92c9b5c4142e38c7c8b4c48178894a0a3c5", size = 2436986 }, +] + +[[package]] +name = "chromadb" +version = "0.5.15" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bcrypt" }, + { name = "build" }, + { name = "chroma-hnswlib" }, + { name = "fastapi" }, + { name = "grpcio" }, + { name = "httpx" }, + { name = "importlib-resources" }, + { name = "kubernetes" }, + { name = "mmh3" }, + { name = "numpy" }, + { name = "onnxruntime" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-instrumentation-fastapi" }, + { name = "opentelemetry-sdk" }, + { name = "orjson" }, + { name = "overrides" }, + { name = "posthog" }, + { name = "pydantic" }, + { name = "pypika" }, + { name = "pyyaml" }, + { name = "rich" }, + { name = "tenacity" }, + { name = "tokenizers" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, + { name = "uvicorn", extra = ["standard"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/34/ae/1ec964744b2e8d26db386617c63bd18ff6fdacba854b699b2d07cc8811f5/chromadb-0.5.15.tar.gz", hash = "sha256:9314a1904418dafbc4d7ed47d88b8c9d0cf51f5ca6e9377e668367ef3c46ee75", size = 33609544 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/43/7295465181c22b0e84162c6b647859f691c69bde5bd8a8f30b320ccc2e3c/chromadb-0.5.15-py3-none-any.whl", hash = "sha256:df8ccc3a36798e14d6e173261aabcdb88021d8ad7550ab2a6acbd79f5ab5ef4f", size = 607020 }, +] + [[package]] name = "chromedriver-autoinstaller" version = "0.6.4" @@ -913,6 +1058,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, ] +[[package]] +name = "coloredlogs" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "humanfriendly" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018 }, +] + [[package]] name = "comm" version = "0.2.2" @@ -1100,6 +1257,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, ] +[[package]] +name = "durationpy" +version = "0.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/31/e9/f49c4e7fccb77fa5c43c2480e09a857a78b41e7331a75e128ed5df45c56b/durationpy-0.9.tar.gz", hash = "sha256:fd3feb0a69a0057d582ef643c355c40d2fa1c942191f914d12203b1a01ac722a", size = 3186 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/a3/ac312faeceffd2d8f86bc6dcb5c401188ba5a01bc88e69bed97578a0dfcd/durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38", size = 3461 }, +] + [[package]] name = "et-xmlfile" version = "1.1.0" @@ -1203,6 +1369,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/1e/0f9bc46ad8c93dca2c34ee7814119d9aefb364cff4494995f0e6e44ccc92/FLAML-2.3.1-py3-none-any.whl", hash = "sha256:c633c73bcb4e0ec56d7ccd1db9577c40ccf8247455a51868da2a2b93795bb26a", size = 313347 }, ] +[[package]] +name = "flatbuffers" +version = "24.3.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/74/2df95ef84b214d2bee0886d572775a6f38793f5ca6d7630c3239c91104ac/flatbuffers-24.3.25.tar.gz", hash = "sha256:de2ec5b203f21441716617f38443e0a8ebf3d25bf0d9c0bb0ce68fa00ad546a4", size = 22139 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/f0/7e988a019bc54b2dbd0ad4182ef2d53488bb02e58694cd79d61369e85900/flatbuffers-24.3.25-py2.py3-none-any.whl", hash = "sha256:8dbdec58f935f3765e4f7f3cf635ac3a77f83568138d6a2311f524ec96364812", size = 26784 }, +] + [[package]] name = "frozenlist" version = "1.4.1" @@ -1680,6 +1855,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854 }, ] +[[package]] +name = "httptools" +version = "0.6.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/9a/ce5e1f7e131522e6d3426e8e7a490b3a01f39a6696602e1c4f33f9e94277/httptools-0.6.4.tar.gz", hash = "sha256:4e93eee4add6493b59a5c514da98c939b244fce4a0d8879cd3f466562f4b7d5c", size = 240639 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/6f/972f8eb0ea7d98a1c6be436e2142d51ad2a64ee18e02b0e7ff1f62171ab1/httptools-0.6.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c73ce323711a6ffb0d247dcd5a550b8babf0f757e86a52558fe5b86d6fefcc0", size = 198780 }, + { url = "https://files.pythonhosted.org/packages/6a/b0/17c672b4bc5c7ba7f201eada4e96c71d0a59fbc185e60e42580093a86f21/httptools-0.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:345c288418f0944a6fe67be8e6afa9262b18c7626c3ef3c28adc5eabc06a68da", size = 103297 }, + { url = "https://files.pythonhosted.org/packages/92/5e/b4a826fe91971a0b68e8c2bd4e7db3e7519882f5a8ccdb1194be2b3ab98f/httptools-0.6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deee0e3343f98ee8047e9f4c5bc7cedbf69f5734454a94c38ee829fb2d5fa3c1", size = 443130 }, + { url = "https://files.pythonhosted.org/packages/b0/51/ce61e531e40289a681a463e1258fa1e05e0be54540e40d91d065a264cd8f/httptools-0.6.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca80b7485c76f768a3bc83ea58373f8db7b015551117375e4918e2aa77ea9b50", size = 442148 }, + { url = "https://files.pythonhosted.org/packages/ea/9e/270b7d767849b0c96f275c695d27ca76c30671f8eb8cc1bab6ced5c5e1d0/httptools-0.6.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:90d96a385fa941283ebd231464045187a31ad932ebfa541be8edf5b3c2328959", size = 415949 }, + { url = "https://files.pythonhosted.org/packages/81/86/ced96e3179c48c6f656354e106934e65c8963d48b69be78f355797f0e1b3/httptools-0.6.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:59e724f8b332319e2875efd360e61ac07f33b492889284a3e05e6d13746876f4", size = 417591 }, + { url = "https://files.pythonhosted.org/packages/75/73/187a3f620ed3175364ddb56847d7a608a6fc42d551e133197098c0143eca/httptools-0.6.4-cp310-cp310-win_amd64.whl", hash = "sha256:c26f313951f6e26147833fc923f78f95604bbec812a43e5ee37f26dc9e5a686c", size = 88344 }, + { url = "https://files.pythonhosted.org/packages/7b/26/bb526d4d14c2774fe07113ca1db7255737ffbb119315839af2065abfdac3/httptools-0.6.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f47f8ed67cc0ff862b84a1189831d1d33c963fb3ce1ee0c65d3b0cbe7b711069", size = 199029 }, + { url = "https://files.pythonhosted.org/packages/a6/17/3e0d3e9b901c732987a45f4f94d4e2c62b89a041d93db89eafb262afd8d5/httptools-0.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0614154d5454c21b6410fdf5262b4a3ddb0f53f1e1721cfd59d55f32138c578a", size = 103492 }, + { url = "https://files.pythonhosted.org/packages/b7/24/0fe235d7b69c42423c7698d086d4db96475f9b50b6ad26a718ef27a0bce6/httptools-0.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8787367fbdfccae38e35abf7641dafc5310310a5987b689f4c32cc8cc3ee975", size = 462891 }, + { url = "https://files.pythonhosted.org/packages/b1/2f/205d1f2a190b72da6ffb5f41a3736c26d6fa7871101212b15e9b5cd8f61d/httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b0f7fe4fd38e6a507bdb751db0379df1e99120c65fbdc8ee6c1d044897a636", size = 459788 }, + { url = "https://files.pythonhosted.org/packages/6e/4c/d09ce0eff09057a206a74575ae8f1e1e2f0364d20e2442224f9e6612c8b9/httptools-0.6.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40a5ec98d3f49904b9fe36827dcf1aadfef3b89e2bd05b0e35e94f97c2b14721", size = 433214 }, + { url = "https://files.pythonhosted.org/packages/3e/d2/84c9e23edbccc4a4c6f96a1b8d99dfd2350289e94f00e9ccc7aadde26fb5/httptools-0.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dacdd3d10ea1b4ca9df97a0a303cbacafc04b5cd375fa98732678151643d4988", size = 434120 }, + { url = "https://files.pythonhosted.org/packages/d0/46/4d8e7ba9581416de1c425b8264e2cadd201eb709ec1584c381f3e98f51c1/httptools-0.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:288cd628406cc53f9a541cfaf06041b4c71d751856bab45e3702191f931ccd17", size = 88565 }, + { url = "https://files.pythonhosted.org/packages/bb/0e/d0b71465c66b9185f90a091ab36389a7352985fe857e352801c39d6127c8/httptools-0.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:df017d6c780287d5c80601dafa31f17bddb170232d85c066604d8558683711a2", size = 200683 }, + { url = "https://files.pythonhosted.org/packages/e2/b8/412a9bb28d0a8988de3296e01efa0bd62068b33856cdda47fe1b5e890954/httptools-0.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:85071a1e8c2d051b507161f6c3e26155b5c790e4e28d7f236422dbacc2a9cc44", size = 104337 }, + { url = "https://files.pythonhosted.org/packages/9b/01/6fb20be3196ffdc8eeec4e653bc2a275eca7f36634c86302242c4fbb2760/httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69422b7f458c5af875922cdb5bd586cc1f1033295aa9ff63ee196a87519ac8e1", size = 508796 }, + { url = "https://files.pythonhosted.org/packages/f7/d8/b644c44acc1368938317d76ac991c9bba1166311880bcc0ac297cb9d6bd7/httptools-0.6.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16e603a3bff50db08cd578d54f07032ca1631450ceb972c2f834c2b860c28ea2", size = 510837 }, + { url = "https://files.pythonhosted.org/packages/52/d8/254d16a31d543073a0e57f1c329ca7378d8924e7e292eda72d0064987486/httptools-0.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec4f178901fa1834d4a060320d2f3abc5c9e39766953d038f1458cb885f47e81", size = 485289 }, + { url = "https://files.pythonhosted.org/packages/5f/3c/4aee161b4b7a971660b8be71a92c24d6c64372c1ab3ae7f366b3680df20f/httptools-0.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb89ecf8b290f2e293325c646a211ff1c2493222798bb80a530c5e7502494f", size = 489779 }, + { url = "https://files.pythonhosted.org/packages/12/b7/5cae71a8868e555f3f67a50ee7f673ce36eac970f029c0c5e9d584352961/httptools-0.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:db78cb9ca56b59b016e64b6031eda5653be0589dba2b1b43453f6e8b405a0970", size = 88634 }, + { url = "https://files.pythonhosted.org/packages/94/a3/9fe9ad23fd35f7de6b91eeb60848986058bd8b5a5c1e256f5860a160cc3e/httptools-0.6.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ade273d7e767d5fae13fa637f4d53b6e961fb7fd93c7797562663f0171c26660", size = 197214 }, + { url = "https://files.pythonhosted.org/packages/ea/d9/82d5e68bab783b632023f2fa31db20bebb4e89dfc4d2293945fd68484ee4/httptools-0.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:856f4bc0478ae143bad54a4242fccb1f3f86a6e1be5548fecfd4102061b3a083", size = 102431 }, + { url = "https://files.pythonhosted.org/packages/96/c1/cb499655cbdbfb57b577734fde02f6fa0bbc3fe9fb4d87b742b512908dff/httptools-0.6.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:322d20ea9cdd1fa98bd6a74b77e2ec5b818abdc3d36695ab402a0de8ef2865a3", size = 473121 }, + { url = "https://files.pythonhosted.org/packages/af/71/ee32fd358f8a3bb199b03261f10921716990808a675d8160b5383487a317/httptools-0.6.4-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d87b29bd4486c0093fc64dea80231f7c7f7eb4dc70ae394d70a495ab8436071", size = 473805 }, + { url = "https://files.pythonhosted.org/packages/8a/0a/0d4df132bfca1507114198b766f1737d57580c9ad1cf93c1ff673e3387be/httptools-0.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:342dd6946aa6bda4b8f18c734576106b8a31f2fe31492881a9a160ec84ff4bd5", size = 448858 }, + { url = "https://files.pythonhosted.org/packages/1e/6a/787004fdef2cabea27bad1073bf6a33f2437b4dbd3b6fb4a9d71172b1c7c/httptools-0.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b36913ba52008249223042dca46e69967985fb4051951f94357ea681e1f5dc0", size = 452042 }, + { url = "https://files.pythonhosted.org/packages/4d/dc/7decab5c404d1d2cdc1bb330b1bf70e83d6af0396fd4fc76fc60c0d522bf/httptools-0.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:28908df1b9bb8187393d5b5db91435ccc9c8e891657f9cbb42a2541b44c82fc8", size = 87682 }, +] + [[package]] name = "httpx" version = "0.27.2" @@ -1714,6 +1925,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/8f/d6718641c14d98a5848c6a24d2376028d292074ffade0702940a4b1dde76/huggingface_hub-0.24.6-py3-none-any.whl", hash = "sha256:a990f3232aa985fe749bc9474060cbad75e8b2f115f6665a9fda5b9c97818970", size = 417509 }, ] +[[package]] +name = "humanfriendly" +version = "10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyreadline3", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794 }, +] + [[package]] name = "idna" version = "3.8" @@ -1744,6 +1967,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c0/14/362d31bf1076b21e1bcdcb0dc61944822ff263937b804a79231df2774d28/importlib_metadata-8.4.0-py3-none-any.whl", hash = "sha256:66f342cc6ac9818fc6ff340576acd24d65ba0b3efabb2b4ac08b598965a4a2f1", size = 26269 }, ] +[[package]] +name = "importlib-resources" +version = "6.4.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/be/f3e8c6081b684f176b761e6a2fef02a0be939740ed6f54109a2951d806f3/importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065", size = 43372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/6a/4604f9ae2fa62ef47b9de2fa5ad599589d28c9fd1d335f32759813dfa91e/importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717", size = 36115 }, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -1988,6 +2220,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 }, ] +[[package]] +name = "kubernetes" +version = "31.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "durationpy" }, + { name = "google-auth" }, + { name = "oauthlib" }, + { name = "python-dateutil" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "requests-oauthlib" }, + { name = "six" }, + { name = "urllib3" }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7e/bd/ffcd3104155b467347cd9b3a64eb24182e459579845196b3a200569c8912/kubernetes-31.0.0.tar.gz", hash = "sha256:28945de906c8c259c1ebe62703b56a03b714049372196f854105afe4e6d014c0", size = 916096 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/a8/17f5e28cecdbd6d48127c22abdb794740803491f422a11905c4569d8e139/kubernetes-31.0.0-py2.py3-none-any.whl", hash = "sha256:bf141e2d380c8520eada8b351f4e319ffee9636328c137aa432bc486ca1200e1", size = 1857013 }, +] + [[package]] name = "langchain-core" version = "0.3.6" @@ -2647,6 +2901,96 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/29/3d/4d0dcaf194bfcdf83e076d6609b26ea3a3474b7c9ad19fdca3977c1367c3/mistralai-1.0.3-py3-none-any.whl", hash = "sha256:64af7c9192e64dc66b2da6d1c4d54a1324a881c21665a2f93d6b35d9de9f87c8", size = 216251 }, ] +[[package]] +name = "mmh3" +version = "5.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e2/08/04ad6419f072ea3f51f9a0f429dd30f5f0a0b02ead7ca11a831117b6f9e8/mmh3-5.0.1.tar.gz", hash = "sha256:7dab080061aeb31a6069a181f27c473a1f67933854e36a3464931f2716508896", size = 32008 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/b9/9a91b0a0e330557cdbf51fc43ca0ba306633f2ec6d2b15e871e288592a32/mmh3-5.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f0a4b4bf05778ed77d820d6e7d0e9bd6beb0c01af10e1ce9233f5d2f814fcafa", size = 52867 }, + { url = "https://files.pythonhosted.org/packages/da/28/6b37f0d6707872764e1af49f327b0940b6a3ad995d91b3839b90ba35f559/mmh3-5.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac7a391039aeab95810c2d020b69a94eb6b4b37d4e2374831e92db3a0cdf71c6", size = 38352 }, + { url = "https://files.pythonhosted.org/packages/76/84/a98f59a620b522f218876a0630b02fc345ecf078f6393595756ddb3aa0b5/mmh3-5.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3a2583b5521ca49756d8d8bceba80627a9cc295f255dcab4e3df7ccc2f09679a", size = 38214 }, + { url = "https://files.pythonhosted.org/packages/35/cb/4980c7eb6cd31f49d1913a4066562bc9e0af28526750f1232be9688a9cd4/mmh3-5.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:081a8423fe53c1ac94f87165f3e4c500125d343410c1a0c5f1703e898a3ef038", size = 93502 }, + { url = "https://files.pythonhosted.org/packages/65/f3/29726296fadeaf06134a6978f7c453dfa562cf2f0f1faf9ae28b9b8ef76e/mmh3-5.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8b4d72713799755dc8954a7d36d5c20a6c8de7b233c82404d122c7c7c1707cc", size = 98394 }, + { url = "https://files.pythonhosted.org/packages/35/fd/e181f4f4b250f7b63ee27a7d65e5e290a3ea0e26cc633f4bfd906f04558b/mmh3-5.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:389a6fd51efc76d3182d36ec306448559c1244f11227d2bb771bdd0e6cc91321", size = 98052 }, + { url = "https://files.pythonhosted.org/packages/61/5c/8a5d838da3eb3fb91035ef5eaaea469abab4e8e3fae55607c27a1a07d162/mmh3-5.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39f4128edaa074bff721b1d31a72508cba4d2887ee7867f22082e1fe9d4edea0", size = 86320 }, + { url = "https://files.pythonhosted.org/packages/10/80/3f33a8f4de12cea322607da1a84d001513affb741b3c3cc1277ecb85d34b/mmh3-5.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d5d23a94d91aabba3386b3769048d5f4210fdfef80393fece2f34ba5a7b466c", size = 93232 }, + { url = "https://files.pythonhosted.org/packages/9e/1c/d0ce5f498493be4de2e7e7596e1cbf63315a4c0bb8bb94e3c37c4fad965d/mmh3-5.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:16347d038361f8b8f24fd2b7ef378c9b68ddee9f7706e46269b6e0d322814713", size = 93590 }, + { url = "https://files.pythonhosted.org/packages/d9/66/770b5ad35b5a2eb7965f3fcaeaa76148e59543575d2e27b80690c1b0795c/mmh3-5.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6e299408565af7d61f2d20a5ffdd77cf2ed902460fe4e6726839d59ba4b72316", size = 88433 }, + { url = "https://files.pythonhosted.org/packages/14/58/e0d258b18749d8640233976493716a40aa27352dcb1cea941836357dac24/mmh3-5.0.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:42050af21ddfc5445ee5a66e73a8fc758c71790305e3ee9e4a85a8e69e810f94", size = 99339 }, + { url = "https://files.pythonhosted.org/packages/38/26/7267146122deb584cf377975b994d80c6d72c4c8d0e8eedff4d0cc5cd4c8/mmh3-5.0.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2ae9b1f5ef27ec54659920f0404b7ceb39966e28867c461bfe83a05e8d18ddb0", size = 93944 }, + { url = "https://files.pythonhosted.org/packages/8d/6b/df60b14a2dd383d8848f6f35496c86c7003be3ffb236789e98d002c542c6/mmh3-5.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:50c2495a02045f3047d71d4ae9cdd7a15efc0bcbb7ff17a18346834a8e2d1d19", size = 92798 }, + { url = "https://files.pythonhosted.org/packages/0a/3f/d5fecf13915163a15b449e5cc89232a4df90e836ecad1c38121318119d27/mmh3-5.0.1-cp310-cp310-win32.whl", hash = "sha256:c028fa77cddf351ca13b4a56d43c1775652cde0764cadb39120b68f02a23ecf6", size = 39185 }, + { url = "https://files.pythonhosted.org/packages/74/8e/4bb5ade332a87de633cda21dae09d6002d69601f2b93e9f40302ab2d9acf/mmh3-5.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c5e741e421ec14400c4aae30890515c201f518403bdef29ae1e00d375bb4bbb5", size = 39766 }, + { url = "https://files.pythonhosted.org/packages/16/2b/cd5cfa4d7ad40a37655af491f9270909d63fc27bcf0558ec36000ee5347f/mmh3-5.0.1-cp310-cp310-win_arm64.whl", hash = "sha256:b17156d56fabc73dbf41bca677ceb6faed435cc8544f6566d72ea77d8a17e9d0", size = 36540 }, + { url = "https://files.pythonhosted.org/packages/fb/8a/f3b9cf8b7110fef0f130158d7602af6f5b09f2cf568130814b7c92e2507b/mmh3-5.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a6d5a9b1b923f1643559ba1fc0bf7a5076c90cbb558878d3bf3641ce458f25d", size = 52867 }, + { url = "https://files.pythonhosted.org/packages/bf/06/f466e0da3c5bd6fbb1e047f70fd4e9e9563d0268aa56de511f363478dbf2/mmh3-5.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3349b968be555f7334bbcce839da98f50e1e80b1c615d8e2aa847ea4a964a012", size = 38349 }, + { url = "https://files.pythonhosted.org/packages/13/f0/2d3daca276a4673f82af859e4b0b18befd4e6e54f1017ba48ea9735b2f1b/mmh3-5.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1bd3c94b110e55db02ab9b605029f48a2f7f677c6e58c09d44e42402d438b7e1", size = 38211 }, + { url = "https://files.pythonhosted.org/packages/e3/56/a2d203ca97702d4e045ac1a46a608393da1a1dddb24f81de664dae940518/mmh3-5.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ba84d48608f79adbb10bb09986b6dc33eeda5c2d1bd75d00820081b73bde9", size = 95104 }, + { url = "https://files.pythonhosted.org/packages/ec/45/c7c8ae64e3ae024776a0ce5377c16c6741a3359f3e9505fc35fc5012beb2/mmh3-5.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c0217987a8b8525c8d9170f66d036dec4ab45cfbd53d47e8d76125791ceb155e", size = 100049 }, + { url = "https://files.pythonhosted.org/packages/d5/74/681113776fe406c09870ab2152ffbd214a15bbc8f1d1da9ad73ce594b878/mmh3-5.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2797063a34e78d1b61639a98b0edec1c856fa86ab80c7ec859f1796d10ba429", size = 99671 }, + { url = "https://files.pythonhosted.org/packages/bf/4f/dbb8be18ce9b6ff8df14bc14348c0404b3091fb51df9c673ebfcf5877db3/mmh3-5.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8bba16340adcbd47853a2fbe5afdb397549e8f2e79324ff1dced69a3f8afe7c3", size = 87549 }, + { url = "https://files.pythonhosted.org/packages/5f/82/274d646f3f604c35b7e3d4eb7f3ff08b3bdc6a2c87d797709bb6f084a611/mmh3-5.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:282797957c9f60b51b9d768a602c25f579420cc9af46feb77d457a27823d270a", size = 94780 }, + { url = "https://files.pythonhosted.org/packages/c9/a1/f094ca8b8fb5e2ac53201070bda42b0fee80ceb92c153eb99a1453e3aed3/mmh3-5.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e4fb670c29e63f954f9e7a2cdcd57b36a854c2538f579ef62681ccbaa1de2b69", size = 90430 }, + { url = "https://files.pythonhosted.org/packages/d9/23/4732ba68c6ef7242b69bb53b9e1bcb2ef065d68ed85fd26e829fb911ab5a/mmh3-5.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ee7d85438dc6aff328e19ab052086a3c29e8a9b632998a49e5c4b0034e9e8d6", size = 89451 }, + { url = "https://files.pythonhosted.org/packages/3c/c5/daea5d534fcf20b2399c2a7b1cd00a8d29d4d474247c15c2c94548a1a272/mmh3-5.0.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:b7fb5db231f3092444bc13901e6a8d299667126b00636ffbad4a7b45e1051e2f", size = 94703 }, + { url = "https://files.pythonhosted.org/packages/5e/4a/34d5691e7be7c63c34181387bc69bdcc0005ca93c8b562d68cb5775e0e78/mmh3-5.0.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c100dd441703da5ec136b1d9003ed4a041d8a1136234c9acd887499796df6ad8", size = 91054 }, + { url = "https://files.pythonhosted.org/packages/5c/3a/ab31bb5e9e1a19a4a997593cbe6ce56710308218ff36c7f76d40ff9c8d2e/mmh3-5.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:71f3b765138260fd7a7a2dba0ea5727dabcd18c1f80323c9cfef97a7e86e01d0", size = 89571 }, + { url = "https://files.pythonhosted.org/packages/0b/79/b986bb067dbfcba6879afe6e723aad1bd53f223450532dd9a4606d0af389/mmh3-5.0.1-cp311-cp311-win32.whl", hash = "sha256:9a76518336247fd17689ce3ae5b16883fd86a490947d46a0193d47fb913e26e3", size = 39187 }, + { url = "https://files.pythonhosted.org/packages/48/69/97029eda3df0f84edde16a496a2e71bac508fc5d1f0a31e163da071e2670/mmh3-5.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:336bc4df2e44271f1c302d289cc3d78bd52d3eed8d306c7e4bff8361a12bf148", size = 39766 }, + { url = "https://files.pythonhosted.org/packages/c7/51/538f2b8412303281d8ce2a9a5c4ea84ff81f06de98af0b7c72059727a3bb/mmh3-5.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:af6522722fbbc5999aa66f7244d0986767a46f1fb05accc5200f75b72428a508", size = 36540 }, + { url = "https://files.pythonhosted.org/packages/75/c7/5b52d0882e7c0dccfaf8786a648e2b26c5307c594abe5cbe98c092607c97/mmh3-5.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f2730bb263ed9c388e8860438b057a53e3cc701134a6ea140f90443c4c11aa40", size = 52907 }, + { url = "https://files.pythonhosted.org/packages/01/b5/9609fa353c27188292748db033323c206f3fc6fbfa124bccf6a42af0da08/mmh3-5.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6246927bc293f6d56724536400b85fb85f5be26101fa77d5f97dd5e2a4c69bf2", size = 38389 }, + { url = "https://files.pythonhosted.org/packages/33/99/49bf3c86244857b3b250c2f54aff22a5a78ef12258af556fa39bb1e80699/mmh3-5.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fbca322519a6e6e25b6abf43e940e1667cf8ea12510e07fb4919b48a0cd1c411", size = 38204 }, + { url = "https://files.pythonhosted.org/packages/f8/04/8860cab35b48aaefe40cf88344437e79ddc93cf7ff745dacd1cd56a2be1e/mmh3-5.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eae8c19903ed8a1724ad9e67e86f15d198a7a1271a4f9be83d47e38f312ed672", size = 95091 }, + { url = "https://files.pythonhosted.org/packages/fa/e9/4ac56001a5bab6d26aa3dfabeddea6d7f037fd2972c76803259f51a5af75/mmh3-5.0.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a09fd6cc72c07c0c07c3357714234b646d78052487c4a3bd5f7f6e08408cff60", size = 100055 }, + { url = "https://files.pythonhosted.org/packages/18/e8/7d5fd73f559c423ed5b72f940130c27803a406ee0ffc32ef5422f733df67/mmh3-5.0.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2ff8551fee7ae3b11c5d986b6347ade0dccaadd4670ffdb2b944dee120ffcc84", size = 99764 }, + { url = "https://files.pythonhosted.org/packages/54/d8/c0d89da6c729feec997a9b3b68698894cef12359ade0da95eba9e03b1d5d/mmh3-5.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e39694c73a5a20c8bf36dfd8676ed351e5234d55751ba4f7562d85449b21ef3f", size = 87650 }, + { url = "https://files.pythonhosted.org/packages/dd/41/ec0ee3fd5124c83cb767dcea8569bb326f8981cc88c991e3e4e948a31e24/mmh3-5.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eba6001989a92f72a89c7cf382fda831678bd780707a66b4f8ca90239fdf2123", size = 94976 }, + { url = "https://files.pythonhosted.org/packages/8e/fa/e8059199fe6fbb2fd6494302904cb1209b2f8b6899d58059858a280e89a5/mmh3-5.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0771f90c9911811cc606a5c7b7b58f33501c9ee896ed68a6ac22c7d55878ecc0", size = 90485 }, + { url = "https://files.pythonhosted.org/packages/3a/a0/eb9da5f93dea3f44b8e970f013279d1543ab210ccf63bb030830968682aa/mmh3-5.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:09b31ed0c0c0920363e96641fac4efde65b1ab62b8df86293142f35a254e72b4", size = 89554 }, + { url = "https://files.pythonhosted.org/packages/e7/e8/5803181eac4e015b4caf307af22fea74292dca48e580d93afe402dcdc138/mmh3-5.0.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5cf4a8deda0235312db12075331cb417c4ba163770edfe789bde71d08a24b692", size = 94872 }, + { url = "https://files.pythonhosted.org/packages/ed/f9/4d55063f9dcaed41524f078a85989efdf1d335159af5e70af29942ebae67/mmh3-5.0.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:41f7090a95185ef20ac018581a99337f0cbc84a2135171ee3290a9c0d9519585", size = 91326 }, + { url = "https://files.pythonhosted.org/packages/80/75/0a5acab5291480acd939db80e94448ac937fc7fbfddc0a67b3e721ebfc9c/mmh3-5.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b97b5b368fb7ff22194ec5854f5b12d8de9ab67a0f304728c7f16e5d12135b76", size = 89810 }, + { url = "https://files.pythonhosted.org/packages/9b/fd/eb1a3573cda74d4c2381d10ded62c128e869954ced1881c15e2bcd97a48f/mmh3-5.0.1-cp312-cp312-win32.whl", hash = "sha256:842516acf04da546f94fad52db125ee619ccbdcada179da51c326a22c4578cb9", size = 39206 }, + { url = "https://files.pythonhosted.org/packages/66/e8/542ed252924002b84c43a68a080cfd4facbea0d5df361e4f59637638d3c7/mmh3-5.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:d963be0dbfd9fca209c17172f6110787ebf78934af25e3694fe2ba40e55c1e2b", size = 39799 }, + { url = "https://files.pythonhosted.org/packages/bd/25/ff2cd36c82a23afa57a05cdb52ab467a911fb12c055c8a8238c0d426cbf0/mmh3-5.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:a5da292ceeed8ce8e32b68847261a462d30fd7b478c3f55daae841404f433c15", size = 36537 }, + { url = "https://files.pythonhosted.org/packages/09/e0/fb19c46265c18311b422ba5ce3e18046ad45c48cfb213fd6dbec23ae6b51/mmh3-5.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:673e3f1c8d4231d6fb0271484ee34cb7146a6499fc0df80788adb56fd76842da", size = 52909 }, + { url = "https://files.pythonhosted.org/packages/c3/94/54fc591e7a24c7ce2c531ecfc5715cff932f9d320c2936550cc33d67304d/mmh3-5.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f795a306bd16a52ad578b663462cc8e95500b3925d64118ae63453485d67282b", size = 38396 }, + { url = "https://files.pythonhosted.org/packages/1f/9a/142bcc9d0d28fc8ae45bbfb83926adc069f984cdf3495a71534cc22b8e27/mmh3-5.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5ed57a5e28e502a1d60436cc25c76c3a5ba57545f250f2969af231dc1221e0a5", size = 38207 }, + { url = "https://files.pythonhosted.org/packages/f8/5b/f1c9110aa70321bb1ee713f17851b9534586c63bc25e0110e4fc03ae2450/mmh3-5.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:632c28e7612e909dbb6cbe2fe496201ada4695b7715584005689c5dc038e59ad", size = 94988 }, + { url = "https://files.pythonhosted.org/packages/87/e5/4dc67e7e0e716c641ab0a5875a659e37258417439590feff5c3bd3ff4538/mmh3-5.0.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:53fd6bd525a5985e391c43384672d9d6b317fcb36726447347c7fc75bfed34ec", size = 99969 }, + { url = "https://files.pythonhosted.org/packages/ac/68/d148327337687c53f04ad9ceaedfa9ad155ee0111d0cb06220f044d66720/mmh3-5.0.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dceacf6b0b961a0e499836af3aa62d60633265607aef551b2a3e3c48cdaa5edd", size = 99662 }, + { url = "https://files.pythonhosted.org/packages/13/79/782adb6df6397947c1097b1e94b7f8d95629a4a73df05cf7207bd5148c1f/mmh3-5.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f0738d478fdfb5d920f6aff5452c78f2c35b0eff72caa2a97dfe38e82f93da2", size = 87606 }, + { url = "https://files.pythonhosted.org/packages/f2/c2/0404383281df049d0e4ccf07fabd659fc1f3da834df6708d934116cbf45d/mmh3-5.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e70285e7391ab88b872e5bef632bad16b9d99a6d3ca0590656a4753d55988af", size = 94836 }, + { url = "https://files.pythonhosted.org/packages/c8/33/fda67c5f28e4c2131891cf8cbc3513cfc55881e3cfe26e49328e38ffacb3/mmh3-5.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:27e5fc6360aa6b828546a4318da1a7da6bf6e5474ccb053c3a6aa8ef19ff97bd", size = 90492 }, + { url = "https://files.pythonhosted.org/packages/64/2f/0ed38aefe2a87f30bb1b12e5b75dc69fcffdc16def40d1752d6fc7cbbf96/mmh3-5.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7989530c3c1e2c17bf5a0ec2bba09fd19819078ba90beedabb1c3885f5040b0d", size = 89594 }, + { url = "https://files.pythonhosted.org/packages/95/ab/6e7a5e765fc78e3dbd0a04a04cfdf72e91eb8e31976228e69d82c741a5b4/mmh3-5.0.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:cdad7bee649950da7ecd3cbbbd12fb81f1161072ecbdb5acfa0018338c5cb9cf", size = 94929 }, + { url = "https://files.pythonhosted.org/packages/74/51/f748f00c072006f4a093d9b08853a0e2e3cd5aeaa91343d4e2d942851978/mmh3-5.0.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e143b8f184c1bb58cecd85ab4a4fd6dc65a2d71aee74157392c3fddac2a4a331", size = 91317 }, + { url = "https://files.pythonhosted.org/packages/df/a1/21ee8017a7feb0270c49f756ff56da9f99bd150dcfe3b3f6f0d4b243423d/mmh3-5.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e5eb12e886f3646dd636f16b76eb23fc0c27e8ff3c1ae73d4391e50ef60b40f6", size = 89861 }, + { url = "https://files.pythonhosted.org/packages/c2/d2/46a6d070de4659bdf91cd6a62d659f8cc547dadee52b6d02bcbacb3262ed/mmh3-5.0.1-cp313-cp313-win32.whl", hash = "sha256:16e6dddfa98e1c2d021268e72c78951234186deb4df6630e984ac82df63d0a5d", size = 39201 }, + { url = "https://files.pythonhosted.org/packages/ed/07/316c062f09019b99b248a4183c5333f8eeebe638345484774908a8f2c9c0/mmh3-5.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:d3ffb792d70b8c4a2382af3598dad6ae0c5bd9cee5b7ffcc99aa2f5fd2c1bf70", size = 39807 }, + { url = "https://files.pythonhosted.org/packages/9d/d3/f7e6d7d062b8d7072c3989a528d9d47486ee5d5ae75250f6e26b4976d098/mmh3-5.0.1-cp313-cp313-win_arm64.whl", hash = "sha256:122fa9ec148383f9124292962bda745f192b47bfd470b2af5fe7bb3982b17896", size = 36539 }, +] + +[[package]] +name = "monotonic" +version = "1.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/ca/8e91948b782ddfbd194f323e7e7d9ba12e5877addf04fb2bf8fca38e86ac/monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7", size = 7615 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/67/7e8406a29b6c45be7af7740456f7f37025f0506ae2e05fb9009a53946860/monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c", size = 8154 }, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 }, +] + [[package]] name = "msal" version = "1.30.0" @@ -3012,6 +3356,165 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", size = 15517754 }, ] +[[package]] +name = "nvidia-cublas-cu12" +version = "12.4.5.8" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/7f/7fbae15a3982dc9595e49ce0f19332423b260045d0a6afe93cdbe2f1f624/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3", size = 363333771 }, + { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 }, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.4.127" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/b5/9fb3d00386d3361b03874246190dfec7b206fd74e6e287b26a8fcb359d95/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a", size = 12354556 }, + { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 }, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.4.127" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/aa/083b01c427e963ad0b314040565ea396f914349914c298556484f799e61b/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198", size = 24133372 }, + { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 }, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.4.127" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/aa/b656d755f474e2084971e9a297def515938d56b466ab39624012070cb773/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3", size = 894177 }, + { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 }, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "9.1.0.70" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 }, +] + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.2.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/8a/0e728f749baca3fbeffad762738276e5df60851958be7783af121a7221e7/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399", size = 211422548 }, + { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 }, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.5.147" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/80/9c/a79180e4d70995fdf030c6946991d0171555c6edf95c265c6b2bf7011112/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9", size = 56314811 }, + { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 }, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.6.1.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" }, + { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/6b/a5c33cf16af09166845345275c34ad2190944bcc6026797a39f8e0a282e0/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e", size = 127634111 }, + { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 }, +] + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.3.1.170" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/a9/c0d2f83a53d40a4a41be14cea6a0bf9e668ffcf8b004bd65633f433050c0/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3", size = 207381987 }, + { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 }, +] + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.21.5" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/99/12cd266d6233f47d00daf3a72739872bdc10267d0383508b0b9c84a18bb6/nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0", size = 188654414 }, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.4.127" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/45/239d52c05074898a80a900f49b1615d81c07fceadd5ad6c4f86a987c0bc4/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83", size = 20552510 }, + { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 }, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.4.127" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/39/471f581edbb7804b39e8063d92fc8305bdc7a80ae5c07dbe6ea5c50d14a5/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3", size = 100417 }, + { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 }, +] + +[[package]] +name = "oauthlib" +version = "3.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/fa/fbf4001037904031639e6bfbfc02badfc7e12f137a8afa254df6c4c8a670/oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918", size = 177352 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688 }, +] + +[[package]] +name = "onnxruntime" +version = "1.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coloredlogs" }, + { name = "flatbuffers" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "protobuf" }, + { name = "sympy" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/18/272d3d7406909141d3c9943796e3e97cafa53f4342d9231c0cfd8cb05702/onnxruntime-1.19.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:84fa57369c06cadd3c2a538ae2a26d76d583e7c34bdecd5769d71ca5c0fc750e", size = 16776408 }, + { url = "https://files.pythonhosted.org/packages/d8/d3/eb93f4ae511cfc725d0c69e07008800f8ac018de19ea1e497b306f174ccc/onnxruntime-1.19.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdc471a66df0c1cdef774accef69e9f2ca168c851ab5e4f2f3341512c7ef4666", size = 11491779 }, + { url = "https://files.pythonhosted.org/packages/ca/4b/ce5958074abe4b6e8d1da9c10e443e01a681558a9ec17e5cc7619438e094/onnxruntime-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e3a4ce906105d99ebbe817f536d50a91ed8a4d1592553f49b3c23c4be2560ae6", size = 13170428 }, + { url = "https://files.pythonhosted.org/packages/ce/0f/6df82dfe02467d12adbaa05c2bd17519c29c7df531ed600231f0c741ad22/onnxruntime-1.19.2-cp310-cp310-win32.whl", hash = "sha256:4b3d723cc154c8ddeb9f6d0a8c0d6243774c6b5930847cc83170bfe4678fafb3", size = 9591305 }, + { url = "https://files.pythonhosted.org/packages/3c/d8/68b63dc86b502169d017a86fe8bc718f4b0055ef1f6895bfaddd04f2eead/onnxruntime-1.19.2-cp310-cp310-win_amd64.whl", hash = "sha256:17ed7382d2c58d4b7354fb2b301ff30b9bf308a1c7eac9546449cd122d21cae5", size = 11084902 }, + { url = "https://files.pythonhosted.org/packages/f0/ff/77bee5df55f034ee81d2e1bc58b2b8511b9c54f06ce6566cb562c5d95aa5/onnxruntime-1.19.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d863e8acdc7232d705d49e41087e10b274c42f09e259016a46f32c34e06dc4fd", size = 16779187 }, + { url = "https://files.pythonhosted.org/packages/f3/78/e29f5fb76e0f6524f3520e8e5b9d53282784b45d14068c5112db9f712b0a/onnxruntime-1.19.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c1dfe4f660a71b31caa81fc298a25f9612815215a47b286236e61d540350d7b6", size = 11496005 }, + { url = "https://files.pythonhosted.org/packages/60/ce/be4152da5c1030ab5a159a4a792ed9abad6ba498d79ef0aeba593ff7b5bf/onnxruntime-1.19.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a36511dc07c5c964b916697e42e366fa43c48cdb3d3503578d78cef30417cb84", size = 13167809 }, + { url = "https://files.pythonhosted.org/packages/e1/00/9740a074eb0e0a21ff13a2c4f32aecc5b21110b2c9b9177d8ac132b66e2d/onnxruntime-1.19.2-cp311-cp311-win32.whl", hash = "sha256:50cbb8dc69d6befad4746a69760e5b00cc3ff0a59c6c3fb27f8afa20e2cab7e7", size = 9591445 }, + { url = "https://files.pythonhosted.org/packages/1e/f5/9d995a685f97508b3254f17015b4a78641b0625e79480a7aed7a7a105d7c/onnxruntime-1.19.2-cp311-cp311-win_amd64.whl", hash = "sha256:1c3e5d415b78337fa0b1b75291e9ea9fb2a4c1f148eb5811e7212fed02cfffa8", size = 11085695 }, + { url = "https://files.pythonhosted.org/packages/f2/a5/2a02687a88fc8a2507bef65876c90e96b9f8de5ba1f810acbf67c140fc67/onnxruntime-1.19.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:68e7051bef9cfefcbb858d2d2646536829894d72a4130c24019219442b1dd2ed", size = 16790434 }, + { url = "https://files.pythonhosted.org/packages/47/64/da42254ec14452cad2cdd4cf407094841c0a378c0d08944e9a36172197e9/onnxruntime-1.19.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d2d366fbcc205ce68a8a3bde2185fd15c604d9645888703785b61ef174265168", size = 11486028 }, + { url = "https://files.pythonhosted.org/packages/b2/92/3574f6836f33b1b25f272293e72538c38451b12c2d9aa08630bb6bc0f057/onnxruntime-1.19.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:477b93df4db467e9cbf34051662a4b27c18e131fa1836e05974eae0d6e4cf29b", size = 13175054 }, + { url = "https://files.pythonhosted.org/packages/ff/c9/8c37e413a830cac7f7dc094fffbd0c998c8bcb66a6f0b0a3201a49bc742b/onnxruntime-1.19.2-cp312-cp312-win32.whl", hash = "sha256:9a174073dc5608fad05f7cf7f320b52e8035e73d80b0a23c80f840e5a97c0147", size = 9592681 }, + { url = "https://files.pythonhosted.org/packages/44/c0/59768846533786a82cafb38d8d2f900ad666bc91f0ae634774d286fa3c47/onnxruntime-1.19.2-cp312-cp312-win_amd64.whl", hash = "sha256:190103273ea4507638ffc31d66a980594b237874b65379e273125150eb044857", size = 11086411 }, +] + [[package]] name = "openai" version = "1.43.0" @@ -3056,6 +3559,94 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/1f/737dcdbc9fea2fa96c1b392ae47275165a7c641663fbb08a8d252968eed2/opentelemetry_api-1.27.0-py3-none-any.whl", hash = "sha256:953d5871815e7c30c81b56d910c707588000fff7a3ca1c73e6531911d53065e7", size = 63970 }, ] +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-proto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/2e/7eaf4ba595fb5213cf639c9158dfb64aacb2e4c7d74bfa664af89fa111f4/opentelemetry_exporter_otlp_proto_common-1.27.0.tar.gz", hash = "sha256:159d27cf49f359e3798c4c3eb8da6ef4020e292571bd8c5604a2a573231dd5c8", size = 17860 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/27/4610ab3d9bb3cde4309b6505f98b3aabca04a26aa480aa18cede23149837/opentelemetry_exporter_otlp_proto_common-1.27.0-py3-none-any.whl", hash = "sha256:675db7fffcb60946f3a5c43e17d1168a3307a94a930ecf8d2ea1f286f3d4f79a", size = 17848 }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "googleapis-common-protos" }, + { name = "grpcio" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/d0/c1e375b292df26e0ffebf194e82cd197e4c26cc298582bda626ce3ce74c5/opentelemetry_exporter_otlp_proto_grpc-1.27.0.tar.gz", hash = "sha256:af6f72f76bcf425dfb5ad11c1a6d6eca2863b91e63575f89bb7b4b55099d968f", size = 26244 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/80/32217460c2c64c0568cea38410124ff680a9b65f6732867bbf857c4d8626/opentelemetry_exporter_otlp_proto_grpc-1.27.0-py3-none-any.whl", hash = "sha256:56b5bbd5d61aab05e300d9d62a6b3c134827bbd28d0b12f2649c2da368006c9e", size = 18541 }, +] + +[[package]] +name = "opentelemetry-instrumentation" +version = "0.48b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "setuptools" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/0e/d9394839af5d55c8feb3b22cd11138b953b49739b20678ca96289e30f904/opentelemetry_instrumentation-0.48b0.tar.gz", hash = "sha256:94929685d906380743a71c3970f76b5f07476eea1834abd5dd9d17abfe23cc35", size = 24724 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/7f/405c41d4f359121376c9d5117dcf68149b8122d3f6c718996d037bd4d800/opentelemetry_instrumentation-0.48b0-py3-none-any.whl", hash = "sha256:a69750dc4ba6a5c3eb67986a337185a25b739966d80479befe37b546fc870b44", size = 29449 }, +] + +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.48b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asgiref" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/ac/fd3d40bab3234ec3f5c052a815100676baaae1832fa1067935f11e5c59c6/opentelemetry_instrumentation_asgi-0.48b0.tar.gz", hash = "sha256:04c32174b23c7fa72ddfe192dad874954968a6a924608079af9952964ecdf785", size = 23435 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/74/a0e0d38622856597dd8e630f2bd793760485eb165708e11b8be1696bbb5a/opentelemetry_instrumentation_asgi-0.48b0-py3-none-any.whl", hash = "sha256:ddb1b5fc800ae66e85a4e2eca4d9ecd66367a8c7b556169d9e7b57e10676e44d", size = 15958 }, +] + +[[package]] +name = "opentelemetry-instrumentation-fastapi" +version = "0.48b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-asgi" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/20/43477da5850ef2cd3792715d442aecd051e885e0603b6ee5783b2104ba8f/opentelemetry_instrumentation_fastapi-0.48b0.tar.gz", hash = "sha256:21a72563ea412c0b535815aeed75fc580240f1f02ebc72381cfab672648637a2", size = 18497 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/50/745ab075a3041b7a5f29a579d2c28eaad54f64b4589d8f9fd364c62cf0f3/opentelemetry_instrumentation_fastapi-0.48b0-py3-none-any.whl", hash = "sha256:afeb820a59e139d3e5d96619600f11ce0187658b8ae9e3480857dd790bc024f2", size = 11777 }, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9a/59/959f0beea798ae0ee9c979b90f220736fbec924eedbefc60ca581232e659/opentelemetry_proto-1.27.0.tar.gz", hash = "sha256:33c9345d91dafd8a74fc3d7576c5a38f18b7fdf8d02983ac67485386132aedd6", size = 34749 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/56/3d2d826834209b19a5141eed717f7922150224d1a982385d19a9444cbf8d/opentelemetry_proto-1.27.0-py3-none-any.whl", hash = "sha256:b133873de5581a50063e1e4b29cdcf0c5e253a8c2d8dc1229add20a4c3830ace", size = 52464 }, +] + [[package]] name = "opentelemetry-sdk" version = "1.27.0" @@ -3083,6 +3674,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/7a/4f0063dbb0b6c971568291a8bc19a4ca70d3c185db2d956230dd67429dfc/opentelemetry_semantic_conventions-0.48b0-py3-none-any.whl", hash = "sha256:a0de9f45c413a8669788a38569c7e0a11ce6ce97861a628cca785deecdc32a1f", size = 149685 }, ] +[[package]] +name = "opentelemetry-util-http" +version = "0.48b0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/d7/185c494754340e0a3928fd39fde2616ee78f2c9d66253affaad62d5b7935/opentelemetry_util_http-0.48b0.tar.gz", hash = "sha256:60312015153580cc20f322e5cdc3d3ecad80a71743235bdb77716e742814623c", size = 7863 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/2e/36097c0a4d0115b8c7e377c90bab7783ac183bc5cb4071308f8959454311/opentelemetry_util_http-0.48b0-py3-none-any.whl", hash = "sha256:76f598af93aab50328d2a69c786beaedc8b6a7770f7a818cc307eb353debfffb", size = 6946 }, +] + [[package]] name = "orjson" version = "3.10.7" @@ -3139,6 +3739,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/55/8b/5ab7257531a5d830fc8000c476e63c935488d74609b50f9384a643ec0a62/outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b", size = 10692 }, ] +[[package]] +name = "overrides" +version = "7.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832 }, +] + [[package]] name = "packaging" version = "24.1" @@ -3386,6 +3995,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/fb/a70a4214956182e0d7a9099ab17d50bfcba1056188e9b14f35b9e2b62a0d/portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf", size = 18423 }, ] +[[package]] +name = "posthog" +version = "3.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "monotonic" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4c/7a/a6ab0d18f93255a4488196269ff53f2720821b169e1964cbf785d5f54b32/posthog-3.7.0.tar.gz", hash = "sha256:b095d4354ba23f8b346ab5daed8ecfc5108772f922006982dfe8b2d29ebc6e0e", size = 49661 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/11/a8d4283b324cda992fbb72611c46c5c68f87902a10383dba1bde91660cc6/posthog-3.7.0-py2.py3-none-any.whl", hash = "sha256:3555161c3a9557b5666f96d8e1f17f410ea0f07db56e399e336a1656d4e5c722", size = 54359 }, +] + [[package]] name = "prompt-toolkit" version = "3.0.47" @@ -3446,7 +4071,7 @@ name = "psycopg" version = "3.2.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions" }, { name = "tzdata", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d1/ad/7ce016ae63e231575df0498d2395d15f005f05e32d3a2d439038e1bd0851/psycopg-3.2.3.tar.gz", hash = "sha256:a5764f67c27bec8bfac85764d23c534af2c27b893550377e37ce59c12aac47a2", size = 155550 } @@ -3734,6 +4359,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/60/eccdd92dd4af3e4bea6d6a342f7588c618a15b9bec4b968af581e498bcc4/pypdf-4.3.1-py3-none-any.whl", hash = "sha256:64b31da97eda0771ef22edb1bfecd5deee4b72c3d1736b7df2689805076d6418", size = 295825 }, ] +[[package]] +name = "pypika" +version = "0.48.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/2c/94ed7b91db81d61d7096ac8f2d325ec562fc75e35f3baea8749c85b28784/PyPika-0.48.9.tar.gz", hash = "sha256:838836a61747e7c8380cd1b7ff638694b7a7335345d0f559b04b2cd832ad5378", size = 67259 } + +[[package]] +name = "pyproject-hooks" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/82/28175b2414effca1cdac8dc99f76d660e7a4fb0ceefa4b4ab8f5f6742925/pyproject_hooks-1.2.0.tar.gz", hash = "sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8", size = 19228 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl", hash = "sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913", size = 10216 }, +] + +[[package]] +name = "pyreadline3" +version = "3.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178 }, +] + [[package]] name = "pyright" version = "1.1.378" @@ -4092,6 +4741,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/25/dd878a121fcfdf38f52850f11c512e13ec87c2ea72385933818e5b6c15ce/requests_file-2.1.0-py2.py3-none-any.whl", hash = "sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c", size = 4244 }, ] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 }, +] + [[package]] name = "rich" version = "13.8.0" @@ -4213,6 +4875,101 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/f1/3db1590be946c14d86ac0cc8422e5808500903592b7ca09a097e425b1dba/ruff-0.4.8-py3-none-win_arm64.whl", hash = "sha256:14019a06dbe29b608f6b7cbcec300e3170a8d86efaddb7b23405cb7f7dcaf780", size = 7944828 }, ] +[[package]] +name = "safetensors" +version = "0.4.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/46/a1c56ed856c6ac3b1a8b37abe5be0cac53219367af1331e721b04d122577/safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310", size = 65702 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/10/0798ec2c8704c2d172620d8a3725bed92cdd75516357b1a3e64d4229ea4e/safetensors-0.4.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a63eaccd22243c67e4f2b1c3e258b257effc4acd78f3b9d397edc8cf8f1298a7", size = 392312 }, + { url = "https://files.pythonhosted.org/packages/2b/9e/9648d8dbb485c40a4a0212b7537626ae440b48156cc74601ca0b7a7615e0/safetensors-0.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:23fc9b4ec7b602915cbb4ec1a7c1ad96d2743c322f20ab709e2c35d1b66dad27", size = 381858 }, + { url = "https://files.pythonhosted.org/packages/8b/67/49556aeacc00df353767ed31d68b492fecf38c3f664c52692e4d92aa0032/safetensors-0.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6885016f34bef80ea1085b7e99b3c1f92cb1be78a49839203060f67b40aee761", size = 441382 }, + { url = "https://files.pythonhosted.org/packages/5d/ce/e9f4869a37bb11229e6cdb4e73a6ef23b4f360eee9dca5f7e40982779704/safetensors-0.4.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:133620f443450429322f238fda74d512c4008621227fccf2f8cf4a76206fea7c", size = 439001 }, + { url = "https://files.pythonhosted.org/packages/a0/27/aee8cf031b89c34caf83194ec6b7f2eed28d053fff8b6da6d00c85c56035/safetensors-0.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4fb3e0609ec12d2a77e882f07cced530b8262027f64b75d399f1504ffec0ba56", size = 478026 }, + { url = "https://files.pythonhosted.org/packages/da/33/1d9fc4805c623636e7d460f28eec92ebd1856f7a552df8eb78398a1ef4de/safetensors-0.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0f1dd769f064adc33831f5e97ad07babbd728427f98e3e1db6902e369122737", size = 495545 }, + { url = "https://files.pythonhosted.org/packages/b9/df/6f766b56690709d22e83836e4067a1109a7d84ea152a6deb5692743a2805/safetensors-0.4.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6d156bdb26732feada84f9388a9f135528c1ef5b05fae153da365ad4319c4c5", size = 435016 }, + { url = "https://files.pythonhosted.org/packages/90/fa/7bc3f18086201b1e55a42c88b822ae197d0158e12c54cd45c887305f1b7e/safetensors-0.4.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e347d77e2c77eb7624400ccd09bed69d35c0332f417ce8c048d404a096c593b", size = 456273 }, + { url = "https://files.pythonhosted.org/packages/3e/59/2ae50150d37a65c1c5f01aec74dc737707b8bbecdc76307e5a1a12c8a376/safetensors-0.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9f556eea3aec1d3d955403159fe2123ddd68e880f83954ee9b4a3f2e15e716b6", size = 619669 }, + { url = "https://files.pythonhosted.org/packages/fe/43/10f0bb597aef62c9c154152e265057089f3c729bdd980e6c32c3ec2407a4/safetensors-0.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9483f42be3b6bc8ff77dd67302de8ae411c4db39f7224dec66b0eb95822e4163", size = 605212 }, + { url = "https://files.pythonhosted.org/packages/7c/75/ede6887ea0ceaba55730988bfc7668dc147a8758f907fa6db26fbb681b8e/safetensors-0.4.5-cp310-none-win32.whl", hash = "sha256:7389129c03fadd1ccc37fd1ebbc773f2b031483b04700923c3511d2a939252cc", size = 272652 }, + { url = "https://files.pythonhosted.org/packages/ba/f0/919c72a9eef843781e652d0650f2819039943e69b69d5af2d0451a23edc3/safetensors-0.4.5-cp310-none-win_amd64.whl", hash = "sha256:e98ef5524f8b6620c8cdef97220c0b6a5c1cef69852fcd2f174bb96c2bb316b1", size = 285879 }, + { url = "https://files.pythonhosted.org/packages/9a/a5/25bcf75e373412daf1fd88045ab3aa8140a0d804ef0e70712c4f2c5b94d8/safetensors-0.4.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:21f848d7aebd5954f92538552d6d75f7c1b4500f51664078b5b49720d180e47c", size = 392256 }, + { url = "https://files.pythonhosted.org/packages/08/8c/ece3bf8756506a890bd980eca02f47f9d98dfbf5ce16eda1368f53560f67/safetensors-0.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb07000b19d41e35eecef9a454f31a8b4718a185293f0d0b1c4b61d6e4487971", size = 381490 }, + { url = "https://files.pythonhosted.org/packages/39/83/c4a7ce01d626e46ea2b45887f2e59b16441408031e2ce2f9fe01860c6946/safetensors-0.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09dedf7c2fda934ee68143202acff6e9e8eb0ddeeb4cfc24182bef999efa9f42", size = 441093 }, + { url = "https://files.pythonhosted.org/packages/47/26/cc52de647e71bd9a0b0d78ead0d31d9c462b35550a817aa9e0cab51d6db4/safetensors-0.4.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:59b77e4b7a708988d84f26de3ebead61ef1659c73dcbc9946c18f3b1786d2688", size = 438960 }, + { url = "https://files.pythonhosted.org/packages/06/78/332538546775ee97e749867df2d58f2282d9c48a1681e4891eed8b94ec94/safetensors-0.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d3bc83e14d67adc2e9387e511097f254bd1b43c3020440e708858c684cbac68", size = 478031 }, + { url = "https://files.pythonhosted.org/packages/d9/03/a3c8663f1ddda54e624ecf43fce651659b49e8e1603c52c3e464b442acfa/safetensors-0.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39371fc551c1072976073ab258c3119395294cf49cdc1f8476794627de3130df", size = 494754 }, + { url = "https://files.pythonhosted.org/packages/e6/ee/69e498a892f208bd1da4104d4b9be887f8611bf4942144718b6738482250/safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6c19feda32b931cae0acd42748a670bdf56bee6476a046af20181ad3fee4090", size = 435013 }, + { url = "https://files.pythonhosted.org/packages/a2/61/f0cfce984515b86d1260f556ba3b782158e2855e6a318446ac2613786fa9/safetensors-0.4.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a659467495de201e2f282063808a41170448c78bada1e62707b07a27b05e6943", size = 455984 }, + { url = "https://files.pythonhosted.org/packages/e7/a9/3e3b48fcaade3eb4e347d39ebf0bd44291db21a3e4507854b42a7cb910ac/safetensors-0.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bad5e4b2476949bcd638a89f71b6916fa9a5cae5c1ae7eede337aca2100435c0", size = 619513 }, + { url = "https://files.pythonhosted.org/packages/80/23/2a7a1be24258c0e44c1d356896fd63dc0545a98d2d0184925fa09cd3ec76/safetensors-0.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a3a315a6d0054bc6889a17f5668a73f94f7fe55121ff59e0a199e3519c08565f", size = 604841 }, + { url = "https://files.pythonhosted.org/packages/b4/5c/34d082ff1fffffd8545fb22cbae3285ab4236f1f0cfc64b7e58261c2363b/safetensors-0.4.5-cp311-none-win32.whl", hash = "sha256:a01e232e6d3d5cf8b1667bc3b657a77bdab73f0743c26c1d3c5dd7ce86bd3a92", size = 272602 }, + { url = "https://files.pythonhosted.org/packages/6d/41/948c96c8a7e9fef57c2e051f1871c108a6dbbc6d285598bdb1d89b98617c/safetensors-0.4.5-cp311-none-win_amd64.whl", hash = "sha256:cbd39cae1ad3e3ef6f63a6f07296b080c951f24cec60188378e43d3713000c04", size = 285973 }, + { url = "https://files.pythonhosted.org/packages/bf/ac/5a63082f931e99200db95fd46fb6734f050bb6e96bf02521904c6518b7aa/safetensors-0.4.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:473300314e026bd1043cef391bb16a8689453363381561b8a3e443870937cc1e", size = 392015 }, + { url = "https://files.pythonhosted.org/packages/73/95/ab32aa6e9bdc832ff87784cdf9da26192b93de3ef82b8d1ada8f345c5044/safetensors-0.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:801183a0f76dc647f51a2d9141ad341f9665602a7899a693207a82fb102cc53e", size = 381774 }, + { url = "https://files.pythonhosted.org/packages/d6/6c/7e04b7626809fc63f3698f4c50e43aff2864b40089aa4506c918a75b8eed/safetensors-0.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1524b54246e422ad6fb6aea1ac71edeeb77666efa67230e1faf6999df9b2e27f", size = 441134 }, + { url = "https://files.pythonhosted.org/packages/58/2b/ffe7c86a277e6c1595fbdf415cfe2903f253f574a5405e93fda8baaa582c/safetensors-0.4.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3139098e3e8b2ad7afbca96d30ad29157b50c90861084e69fcb80dec7430461", size = 438467 }, + { url = "https://files.pythonhosted.org/packages/67/9c/f271bd804e08c7fda954d17b70ff281228a88077337a9e70feace4f4cc93/safetensors-0.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65573dc35be9059770808e276b017256fa30058802c29e1038eb1c00028502ea", size = 476566 }, + { url = "https://files.pythonhosted.org/packages/4c/ad/4cf76a3e430a8a26108407fa6cb93e6f80d996a5cb75d9540c8fe3862990/safetensors-0.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd33da8e9407559f8779c82a0448e2133737f922d71f884da27184549416bfed", size = 492253 }, + { url = "https://files.pythonhosted.org/packages/d9/40/a6f75ea449a9647423ec8b6f72c16998d35aa4b43cb38536ac060c5c7bf5/safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3685ce7ed036f916316b567152482b7e959dc754fcc4a8342333d222e05f407c", size = 434769 }, + { url = "https://files.pythonhosted.org/packages/52/47/d4b49b1231abf3131f7bb0bc60ebb94b27ee33e0a1f9569da05f8ac65dee/safetensors-0.4.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dde2bf390d25f67908278d6f5d59e46211ef98e44108727084d4637ee70ab4f1", size = 457166 }, + { url = "https://files.pythonhosted.org/packages/c3/cd/006468b03b0fa42ff82d795d47c4193e99001e96c3f08bd62ef1b5cab586/safetensors-0.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7469d70d3de970b1698d47c11ebbf296a308702cbaae7fcb993944751cf985f4", size = 619280 }, + { url = "https://files.pythonhosted.org/packages/22/4d/b6208d918e83daa84b424c0ac3191ae61b44b3191613a3a5a7b38f94b8ad/safetensors-0.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3a6ba28118636a130ccbb968bc33d4684c48678695dba2590169d5ab03a45646", size = 605390 }, + { url = "https://files.pythonhosted.org/packages/e8/20/bf0e01825dc01ed75538021a98b9a046e60ead63c6c6700764c821a8c873/safetensors-0.4.5-cp312-none-win32.whl", hash = "sha256:c859c7ed90b0047f58ee27751c8e56951452ed36a67afee1b0a87847d065eec6", size = 273250 }, + { url = "https://files.pythonhosted.org/packages/f1/5f/ab6b6cec85b40789801f35b7d2fb579ae242d8193929974a106d5ff5c835/safetensors-0.4.5-cp312-none-win_amd64.whl", hash = "sha256:b5a8810ad6a6f933fff6c276eae92c1da217b39b4d8b1bc1c0b8af2d270dc532", size = 286307 }, + { url = "https://files.pythonhosted.org/packages/90/61/0e27b1403e311cba0be20026bee4ee822d90eda7dad372179e7f18bb99f3/safetensors-0.4.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:25e5f8e2e92a74f05b4ca55686234c32aac19927903792b30ee6d7bd5653d54e", size = 392062 }, + { url = "https://files.pythonhosted.org/packages/b1/9f/cc31fafc9f5d79da10a83a820ca37f069bab0717895ad8cbcacf629dd1c5/safetensors-0.4.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:81efb124b58af39fcd684254c645e35692fea81c51627259cdf6d67ff4458916", size = 382517 }, + { url = "https://files.pythonhosted.org/packages/a4/c7/4fda8a0ebb96662550433378f4a74c677fa5fc4d0a43a7ec287d1df254a9/safetensors-0.4.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:585f1703a518b437f5103aa9cf70e9bd437cb78eea9c51024329e4fb8a3e3679", size = 441378 }, + { url = "https://files.pythonhosted.org/packages/14/31/9abb431f6209de9c80dab83e1112ebd769f1e32e7ab7ab228a02424a4693/safetensors-0.4.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4b99fbf72e3faf0b2f5f16e5e3458b93b7d0a83984fe8d5364c60aa169f2da89", size = 438831 }, + { url = "https://files.pythonhosted.org/packages/37/37/99bfb195578a808b8d045159ee9264f8da58d017ac0701853dcacda14d4e/safetensors-0.4.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b17b299ca9966ca983ecda1c0791a3f07f9ca6ab5ded8ef3d283fff45f6bcd5f", size = 477112 }, + { url = "https://files.pythonhosted.org/packages/7d/05/fac3ef107e60d2a78532bed171a91669d4bb259e1236f5ea8c67a6976c75/safetensors-0.4.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:76ded72f69209c9780fdb23ea89e56d35c54ae6abcdec67ccb22af8e696e449a", size = 493373 }, + { url = "https://files.pythonhosted.org/packages/cf/7a/825800ee8c68214b4fd3506d5e19209338c69b41e01c6e14dd13969cc8b9/safetensors-0.4.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2783956926303dcfeb1de91a4d1204cd4089ab441e622e7caee0642281109db3", size = 435422 }, + { url = "https://files.pythonhosted.org/packages/5e/6c/7a3233c08bde558d6c33a41219119866cb596139a4673cc6c24024710ffd/safetensors-0.4.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d94581aab8c6b204def4d7320f07534d6ee34cd4855688004a4354e63b639a35", size = 457382 }, + { url = "https://files.pythonhosted.org/packages/a0/58/0b7bcba3788ff503990cf9278d611b56c029400612ba93e772c987b5aa03/safetensors-0.4.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:67e1e7cb8678bb1b37ac48ec0df04faf689e2f4e9e81e566b5c63d9f23748523", size = 619301 }, + { url = "https://files.pythonhosted.org/packages/82/cc/9c2cf58611daf1c83ce5d37f9de66353e23fcda36008b13fd3409a760aa3/safetensors-0.4.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:dbd280b07e6054ea68b0cb4b16ad9703e7d63cd6890f577cb98acc5354780142", size = 605580 }, + { url = "https://files.pythonhosted.org/packages/cf/ff/037ae4c0ee32db496669365e66079b6329906c6814722b159aa700e67208/safetensors-0.4.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fdadf66b5a22ceb645d5435a0be7a0292ce59648ca1d46b352f13cff3ea80410", size = 392951 }, + { url = "https://files.pythonhosted.org/packages/f1/d6/6621e16b35bf83ae099eaab07338f04991a26c9aa43879d05f19f35e149c/safetensors-0.4.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d42ffd4c2259f31832cb17ff866c111684c87bd930892a1ba53fed28370c918c", size = 383417 }, + { url = "https://files.pythonhosted.org/packages/ae/88/3068e1bb16f5e9f9068901de3cf7b3db270b9bfe6e7d51d4b55c1da0425d/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd8a1f6d2063a92cd04145c7fd9e31a1c7d85fbec20113a14b487563fdbc0597", size = 442311 }, + { url = "https://files.pythonhosted.org/packages/f7/15/a2bb77ebbaa76b61ec2e9f731fe4db7f9473fd855d881957c51b3a168892/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:951d2fcf1817f4fb0ef0b48f6696688a4e852a95922a042b3f96aaa67eedc920", size = 436678 }, + { url = "https://files.pythonhosted.org/packages/ec/79/9608c4546cdbfe3860dd7aa59e3562c9289113398b1a0bd89b68ce0a9d41/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ac85d9a8c1af0e3132371d9f2d134695a06a96993c2e2f0bbe25debb9e3f67a", size = 457316 }, + { url = "https://files.pythonhosted.org/packages/0f/23/b17b483f2857835962ad33e38014efd4911791187e177bc23b057d35bee8/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e3cec4a29eb7fe8da0b1c7988bc3828183080439dd559f720414450de076fcab", size = 620565 }, + { url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 }, +] + +[[package]] +name = "scikit-learn" +version = "1.5.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numpy" }, + { name = "scipy" }, + { name = "threadpoolctl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/37/59/44985a2bdc95c74e34fef3d10cb5d93ce13b0e2a7baefffe1b53853b502d/scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d", size = 7001680 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/89/be41419b4bec629a4691183a5eb1796f91252a13a5ffa243fd958cad7e91/scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6", size = 12106070 }, + { url = "https://files.pythonhosted.org/packages/bf/e0/3b6d777d375f3b685f433c93384cdb724fb078e1dc8f8ff0950467e56c30/scikit_learn-1.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0", size = 10971758 }, + { url = "https://files.pythonhosted.org/packages/7b/31/eb7dd56c371640753953277de11356c46a3149bfeebb3d7dcd90b993715a/scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540", size = 12500080 }, + { url = "https://files.pythonhosted.org/packages/4c/1e/a7c7357e704459c7d56a18df4a0bf08669442d1f8878cc0864beccd6306a/scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8", size = 13347241 }, + { url = "https://files.pythonhosted.org/packages/48/76/154ebda6794faf0b0f3ccb1b5cd9a19f0a63cb9e1f3d2c61b6114002677b/scikit_learn-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113", size = 11000477 }, + { url = "https://files.pythonhosted.org/packages/ff/91/609961972f694cb9520c4c3d201e377a26583e1eb83bc5a334c893729214/scikit_learn-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445", size = 12088580 }, + { url = "https://files.pythonhosted.org/packages/cd/7a/19fe32c810c5ceddafcfda16276d98df299c8649e24e84d4f00df4a91e01/scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de", size = 10975994 }, + { url = "https://files.pythonhosted.org/packages/4c/75/62e49f8a62bf3c60b0e64d0fce540578ee4f0e752765beb2e1dc7c6d6098/scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675", size = 12465782 }, + { url = "https://files.pythonhosted.org/packages/49/21/3723de321531c9745e40f1badafd821e029d346155b6c79704e0b7197552/scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1", size = 13322034 }, + { url = "https://files.pythonhosted.org/packages/17/1c/ccdd103cfcc9435a18819856fbbe0c20b8fa60bfc3343580de4be13f0668/scikit_learn-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6", size = 11015224 }, + { url = "https://files.pythonhosted.org/packages/a4/db/b485c1ac54ff3bd9e7e6b39d3cc6609c4c76a65f52ab0a7b22b6c3ab0e9d/scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a", size = 12110344 }, + { url = "https://files.pythonhosted.org/packages/54/1a/7deb52fa23aebb855431ad659b3c6a2e1709ece582cb3a63d66905e735fe/scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1", size = 11033502 }, + { url = "https://files.pythonhosted.org/packages/a1/32/4a7a205b14c11225609b75b28402c196e4396ac754dab6a81971b811781c/scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd", size = 12085794 }, + { url = "https://files.pythonhosted.org/packages/c6/29/044048c5e911373827c0e1d3051321b9183b2a4f8d4e2f11c08fcff83f13/scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6", size = 12945797 }, + { url = "https://files.pythonhosted.org/packages/aa/ce/c0b912f2f31aeb1b756a6ba56bcd84dd1f8a148470526a48515a3f4d48cd/scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1", size = 10985467 }, + { url = "https://files.pythonhosted.org/packages/a4/50/8891028437858cc510e13578fe7046574a60c2aaaa92b02d64aac5b1b412/scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5", size = 12025584 }, + { url = "https://files.pythonhosted.org/packages/d2/79/17feef8a1c14149436083bec0e61d7befb4812e272d5b20f9d79ea3e9ab1/scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908", size = 10959795 }, + { url = "https://files.pythonhosted.org/packages/b1/c8/f08313f9e2e656bd0905930ae8bf99a573ea21c34666a813b749c338202f/scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3", size = 12077302 }, + { url = "https://files.pythonhosted.org/packages/a7/48/fbfb4dc72bed0fe31fe045fb30e924909ad03f717c36694351612973b1a9/scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12", size = 13002811 }, + { url = "https://files.pythonhosted.org/packages/a5/e7/0c869f9e60d225a77af90d2aefa7a4a4c0e745b149325d1450f0f0ce5399/scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f", size = 10951354 }, +] + [[package]] name = "scipy" version = "1.14.1" @@ -4273,6 +5030,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/5aac23e57d61707f91914506902e621632de8dc56b60446459901469b9e2/selenium-4.24.0-py3-none-any.whl", hash = "sha256:42c23f60753d5415b261b236cecbd69bd4eb5271e1563915f546b443cb6b71c6", size = 9579812 }, ] +[[package]] +name = "sentence-transformers" +version = "3.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "pillow" }, + { name = "scikit-learn" }, + { name = "scipy" }, + { name = "torch" }, + { name = "tqdm" }, + { name = "transformers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/de/61/708b20dedf26c460b416beb0acd5474c190dbca13e93b40858e99f17ac46/sentence_transformers-3.2.1.tar.gz", hash = "sha256:9fc38e620e5e1beba31d538a451778c9ccdbad77119d90f59f5bce49c4148e79", size = 202527 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/18/1ec591befcbdb2c97192a40fbe7c43a8b8a8b3c89b1fa101d3eeed4d79a4/sentence_transformers-3.2.1-py3-none-any.whl", hash = "sha256:c507e069eea33d15f1f2c72f74d7ea93abef298152cc235ab5af5e3a7584f738", size = 255758 }, +] + [[package]] name = "setuptools" version = "74.0.0" @@ -4540,7 +5315,7 @@ name = "sqlalchemy" version = "2.0.32" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "greenlet", marker = "(python_full_version < '3.13' and platform_machine == 'AMD64') or (python_full_version < '3.13' and platform_machine == 'WIN32') or (python_full_version < '3.13' and platform_machine == 'aarch64') or (python_full_version < '3.13' and platform_machine == 'amd64') or (python_full_version < '3.13' and platform_machine == 'ppc64le') or (python_full_version < '3.13' and platform_machine == 'win32') or (python_full_version < '3.13' and platform_machine == 'x86_64')" }, + { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/af/6f/967e987683908af816aa3072c1a6997ac9933cf38d66b0474fb03f253323/SQLAlchemy-2.0.32.tar.gz", hash = "sha256:c1b88cc8b02b6a5f0efb0345a03672d4c897dc7d92585176f88c67346f565ea8", size = 9546691 } @@ -4625,6 +5400,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/cf/0fea4f4ba3fc2772ac2419278aa9f6964124d4302117d61bc055758e000c/striprtf-0.0.26-py3-none-any.whl", hash = "sha256:8c8f9d32083cdc2e8bfb149455aa1cc5a4e0a035893bedc75db8b73becb3a1bb", size = 6914 }, ] +[[package]] +name = "sympy" +version = "1.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ca/99/5a5b6f19ff9f083671ddf7b9632028436167cd3d33e11015754e41b249a4/sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f", size = 7533040 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177 }, +] + [[package]] name = "tabulate" version = "0.9.0" @@ -4719,6 +5506,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/56/c0514dcfdb2b67333bf4e653ca9cf0fda51004932d3b246bf835376cbaba/textual_imageview-0.1.1-py3-none-any.whl", hash = "sha256:335c8043e2f1f735b1b2ec1753a743d6762578175cd2cedae3ce67e2694800a4", size = 8875 }, ] +[[package]] +name = "threadpoolctl" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/55/b5148dcbf72f5cde221f8bfe3b6a540da7aa1842f6b491ad979a6c8b84af/threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107", size = 41936 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 }, +] + [[package]] name = "tiktoken" version = "0.7.0" @@ -4845,6 +5641,48 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", size = 12757 }, ] +[[package]] +name = "torch" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "jinja2" }, + { name = "networkx" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "setuptools", marker = "python_full_version >= '3.12'" }, + { name = "sympy" }, + { name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" }, + { name = "typing-extensions" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/82/adc3a77b9fbbcb79d398d565d39dc0e09f43fff088599d15da81e6cfaaec/torch-2.5.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:7f179373a047b947dec448243f4e6598a1c960fa3bb978a9a7eecd529fbc363f", size = 906443143 }, + { url = "https://files.pythonhosted.org/packages/64/b0/0d2056c8d379a3f7f0c9fa9adece180f64fd6c339e2007a4fffbea7ecaa0/torch-2.5.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:15fbc95e38d330e5b0ef1593b7bc0a19f30e5bdad76895a5cffa1a6a044235e9", size = 91839507 }, + { url = "https://files.pythonhosted.org/packages/60/41/073193dd2566012eaeae44d6c5e55ba6a9b1d5687a251f12e1804a9e2968/torch-2.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:f499212f1cffea5d587e5f06144630ed9aa9c399bba12ec8905798d833bd1404", size = 203108822 }, + { url = "https://files.pythonhosted.org/packages/93/d4/6e7bda4e52c37a78b5066e407baff2426fd4543356ead3419383a0bf4011/torch-2.5.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:c54db1fade17287aabbeed685d8e8ab3a56fea9dd8d46e71ced2da367f09a49f", size = 64283014 }, + { url = "https://files.pythonhosted.org/packages/75/9f/cde8b71ccca65d68a3733c5c9decef9adefcfaa692f8ab03afbb5de09daa/torch-2.5.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:499a68a756d3b30d10f7e0f6214dc3767b130b797265db3b1c02e9094e2a07be", size = 906478039 }, + { url = "https://files.pythonhosted.org/packages/58/27/5bacfb6600209bf7e77ba115656cf7aca5b6ab1e0dc95551eefac2d6e7ec/torch-2.5.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9f3df8138a1126a851440b7d5a4869bfb7c9cc43563d64fd9d96d0465b581024", size = 91843630 }, + { url = "https://files.pythonhosted.org/packages/78/18/7a2e56e2dc45a433dea9e1bf46a65e234294c9c470ccb4d4b53025f57b23/torch-2.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:b81da3bdb58c9de29d0e1361e52f12fcf10a89673f17a11a5c6c7da1cb1a8376", size = 203117099 }, + { url = "https://files.pythonhosted.org/packages/47/1b/3dfcc84b383f7b27a41de3251753db077b1e23d3f89a3b294cdd2d86fb7b/torch-2.5.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ba135923295d564355326dc409b6b7f5bd6edc80f764cdaef1fb0a1b23ff2f9c", size = 64288133 }, + { url = "https://files.pythonhosted.org/packages/ac/72/d610029ef5cdde3f3aa216e8e75c233b1a91b34af0fc47392b3aa928563a/torch-2.5.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:2dd40c885a05ef7fe29356cca81be1435a893096ceb984441d6e2c27aff8c6f4", size = 906389657 }, + { url = "https://files.pythonhosted.org/packages/22/c2/d1759641eafdf59cb3a339909e96c842fc0c3579681bb7422acaf4a2c179/torch-2.5.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:bc52d603d87fe1da24439c0d5fdbbb14e0ae4874451d53f0120ffb1f6c192727", size = 91823361 }, + { url = "https://files.pythonhosted.org/packages/2b/e3/0f2698930d944087c3ef585b71a1a72aa51929877c1ccf35d625bec9bd78/torch-2.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea718746469246cc63b3353afd75698a288344adb55e29b7f814a5d3c0a7c78d", size = 203064894 }, + { url = "https://files.pythonhosted.org/packages/56/88/f1ddffd642cf71777dca43621b170d50f13175cdd0b4179e04d6e025b5fb/torch-2.5.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6de1fd253e27e7f01f05cd7c37929ae521ca23ca4620cfc7c485299941679112", size = 64261171 }, + { url = "https://files.pythonhosted.org/packages/b4/b1/f06261814df00eee07ac8cf697a6f5d79231d9894c996d5985243343518a/torch-2.5.0-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:83dcf518685db20912b71fc49cbddcc8849438cdb0e9dcc919b02a849e2cd9e8", size = 906416128 }, +] + [[package]] name = "tornado" version = "6.4.1" @@ -4884,6 +5722,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, ] +[[package]] +name = "transformers" +version = "4.45.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "regex" }, + { name = "requests" }, + { name = "safetensors" }, + { name = "tokenizers" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4b/4c/3862b2dd6cdf83b187897bd351da0f7fb74d0df642b03c6f5d06353a3ca0/transformers-4.45.2.tar.gz", hash = "sha256:72bc390f6b203892561f05f86bbfaa0e234aab8e927a83e62b9d92ea7e3ae101", size = 8478357 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9d/030cc1b3e88172967e22ee1d012e0d5e0384eb70d2a098d1669d549aea29/transformers-4.45.2-py3-none-any.whl", hash = "sha256:c551b33660cfc815bae1f9f097ecfd1e65be623f13c6ee0dda372bd881460210", size = 9881312 }, +] + [[package]] name = "trio" version = "0.26.2" @@ -4916,6 +5775,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/be/a9ae5f50cad5b6f85bd2574c2c923730098530096e170c1ce7452394d7aa/trio_websocket-0.11.1-py3-none-any.whl", hash = "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638", size = 17408 }, ] +[[package]] +name = "triton" +version = "3.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/29/69aa56dc0b2eb2602b553881e34243475ea2afd9699be042316842788ff5/triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b0dd10a925263abbe9fa37dcde67a5e9b2383fc269fdf59f5657cac38c5d1d8", size = 209460013 }, + { url = "https://files.pythonhosted.org/packages/86/17/d9a5cf4fcf46291856d1e90762e36cbabd2a56c7265da0d1d9508c8e3943/triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f34f6e7885d1bf0eaaf7ba875a5f0ce6f3c13ba98f9503651c1e6dc6757ed5c", size = 209506424 }, + { url = "https://files.pythonhosted.org/packages/78/eb/65f5ba83c2a123f6498a3097746607e5b2f16add29e36765305e4ac7fdd8/triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8182f42fd8080a7d39d666814fa36c5e30cc00ea7eeeb1a2983dbb4c99a0fdc", size = 209551444 }, +] + [[package]] name = "typer" version = "0.12.5" @@ -5078,6 +5950,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f5/8e/cdc7d6263db313030e4c257dd5ba3909ebc4e4fb53ad62d5f09b1a2f5458/uvicorn-0.30.6-py3-none-any.whl", hash = "sha256:65fd46fe3fda5bdc1b03b94eb634923ff18cd35b2f084813ea79d1f103f711b5", size = 62835 }, ] +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.21.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/c0/854216d09d33c543f12a44b393c402e89a920b1a0a7dc634c42de91b9cf6/uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3", size = 2492741 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/76/44a55515e8c9505aa1420aebacf4dd82552e5e15691654894e90d0bd051a/uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f", size = 1442019 }, + { url = "https://files.pythonhosted.org/packages/35/5a/62d5800358a78cc25c8a6c72ef8b10851bdb8cca22e14d9c74167b7f86da/uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d", size = 801898 }, + { url = "https://files.pythonhosted.org/packages/f3/96/63695e0ebd7da6c741ccd4489b5947394435e198a1382349c17b1146bb97/uvloop-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f38b2e090258d051d68a5b14d1da7203a3c3677321cf32a95a6f4db4dd8b6f26", size = 3827735 }, + { url = "https://files.pythonhosted.org/packages/61/e0/f0f8ec84979068ffae132c58c79af1de9cceeb664076beea86d941af1a30/uvloop-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c43e0f13022b998eb9b973b5e97200c8b90823454d4bc06ab33829e09fb9bb", size = 3825126 }, + { url = "https://files.pythonhosted.org/packages/bf/fe/5e94a977d058a54a19df95f12f7161ab6e323ad49f4dabc28822eb2df7ea/uvloop-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10d66943def5fcb6e7b37310eb6b5639fd2ccbc38df1177262b0640c3ca68c1f", size = 3705789 }, + { url = "https://files.pythonhosted.org/packages/26/dd/c7179618e46092a77e036650c1f056041a028a35c4d76945089fcfc38af8/uvloop-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:67dd654b8ca23aed0a8e99010b4c34aca62f4b7fce88f39d452ed7622c94845c", size = 3800523 }, + { url = "https://files.pythonhosted.org/packages/57/a7/4cf0334105c1160dd6819f3297f8700fda7fc30ab4f61fbf3e725acbc7cc/uvloop-0.21.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c0f3fa6200b3108919f8bdabb9a7f87f20e7097ea3c543754cabc7d717d95cf8", size = 1447410 }, + { url = "https://files.pythonhosted.org/packages/8c/7c/1517b0bbc2dbe784b563d6ab54f2ef88c890fdad77232c98ed490aa07132/uvloop-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0878c2640cf341b269b7e128b1a5fed890adc4455513ca710d77d5e93aa6d6a0", size = 805476 }, + { url = "https://files.pythonhosted.org/packages/ee/ea/0bfae1aceb82a503f358d8d2fa126ca9dbdb2ba9c7866974faec1cb5875c/uvloop-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9fb766bb57b7388745d8bcc53a359b116b8a04c83a2288069809d2b3466c37e", size = 3960855 }, + { url = "https://files.pythonhosted.org/packages/8a/ca/0864176a649838b838f36d44bf31c451597ab363b60dc9e09c9630619d41/uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a375441696e2eda1c43c44ccb66e04d61ceeffcd76e4929e527b7fa401b90fb", size = 3973185 }, + { url = "https://files.pythonhosted.org/packages/30/bf/08ad29979a936d63787ba47a540de2132169f140d54aa25bc8c3df3e67f4/uvloop-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:baa0e6291d91649c6ba4ed4b2f982f9fa165b5bbd50a9e203c416a2797bab3c6", size = 3820256 }, + { url = "https://files.pythonhosted.org/packages/da/e2/5cf6ef37e3daf2f06e651aae5ea108ad30df3cb269102678b61ebf1fdf42/uvloop-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4509360fcc4c3bd2c70d87573ad472de40c13387f5fda8cb58350a1d7475e58d", size = 3937323 }, + { url = "https://files.pythonhosted.org/packages/8c/4c/03f93178830dc7ce8b4cdee1d36770d2f5ebb6f3d37d354e061eefc73545/uvloop-0.21.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:359ec2c888397b9e592a889c4d72ba3d6befba8b2bb01743f72fffbde663b59c", size = 1471284 }, + { url = "https://files.pythonhosted.org/packages/43/3e/92c03f4d05e50f09251bd8b2b2b584a2a7f8fe600008bcc4523337abe676/uvloop-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7089d2dc73179ce5ac255bdf37c236a9f914b264825fdaacaded6990a7fb4c2", size = 821349 }, + { url = "https://files.pythonhosted.org/packages/a6/ef/a02ec5da49909dbbfb1fd205a9a1ac4e88ea92dcae885e7c961847cd51e2/uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baa4dcdbd9ae0a372f2167a207cd98c9f9a1ea1188a8a526431eef2f8116cc8d", size = 4580089 }, + { url = "https://files.pythonhosted.org/packages/06/a7/b4e6a19925c900be9f98bec0a75e6e8f79bb53bdeb891916609ab3958967/uvloop-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86975dca1c773a2c9864f4c52c5a55631038e387b47eaf56210f873887b6c8dc", size = 4693770 }, + { url = "https://files.pythonhosted.org/packages/ce/0c/f07435a18a4b94ce6bd0677d8319cd3de61f3a9eeb1e5f8ab4e8b5edfcb3/uvloop-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:461d9ae6660fbbafedd07559c6a2e57cd553b34b0065b6550685f6653a98c1cb", size = 4451321 }, + { url = "https://files.pythonhosted.org/packages/8f/eb/f7032be105877bcf924709c97b1bf3b90255b4ec251f9340cef912559f28/uvloop-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:183aef7c8730e54c9a3ee3227464daed66e37ba13040bb3f350bc2ddc040f22f", size = 4659022 }, + { url = "https://files.pythonhosted.org/packages/3f/8d/2cbef610ca21539f0f36e2b34da49302029e7c9f09acef0b1c3b5839412b/uvloop-0.21.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bfd55dfcc2a512316e65f16e503e9e450cab148ef11df4e4e679b5e8253a5281", size = 1468123 }, + { url = "https://files.pythonhosted.org/packages/93/0d/b0038d5a469f94ed8f2b2fce2434a18396d8fbfb5da85a0a9781ebbdec14/uvloop-0.21.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787ae31ad8a2856fc4e7c095341cccc7209bd657d0e71ad0dc2ea83c4a6fa8af", size = 819325 }, + { url = "https://files.pythonhosted.org/packages/50/94/0a687f39e78c4c1e02e3272c6b2ccdb4e0085fda3b8352fecd0410ccf915/uvloop-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ee4d4ef48036ff6e5cfffb09dd192c7a5027153948d85b8da7ff705065bacc6", size = 4582806 }, + { url = "https://files.pythonhosted.org/packages/d2/19/f5b78616566ea68edd42aacaf645adbf71fbd83fc52281fba555dc27e3f1/uvloop-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3df876acd7ec037a3d005b3ab85a7e4110422e4d9c1571d4fc89b0fc41b6816", size = 4701068 }, + { url = "https://files.pythonhosted.org/packages/47/57/66f061ee118f413cd22a656de622925097170b9380b30091b78ea0c6ea75/uvloop-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd53ecc9a0f3d87ab847503c2e1552b690362e005ab54e8a48ba97da3924c0dc", size = 4454428 }, + { url = "https://files.pythonhosted.org/packages/63/9a/0962b05b308494e3202d3f794a6e85abe471fe3cafdbcf95c2e8c713aabd/uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553", size = 4660018 }, +] + [[package]] name = "watchfiles" version = "0.24.0" From 347fb6034b6b04abbe392bab4415fabd2f651edf Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Thu, 24 Oct 2024 07:20:46 +1000 Subject: [PATCH 02/12] fix autogen-core dep --- python/packages/autogen-ext/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml index 8d16a40c8e8d..24444d44ef59 100644 --- a/python/packages/autogen-ext/pyproject.toml +++ b/python/packages/autogen-ext/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "autogen-ext" -version = "0.4.0dev1" +version = "0.4.0dev2" license = {file = "LICENSE-CODE"} description = "AutoGen extensions library" readme = "README.md" @@ -15,7 +15,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "autogen-core==0.4.0dev1", + "autogen-core==0.4.0dev2", ] From 4b886b0d54f8a379bd16ee150db9b30435e1f68b Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Thu, 24 Oct 2024 14:30:24 +1000 Subject: [PATCH 03/12] typing fixes --- .../src/autogen_ext/storage/_base.py | 6 +- .../src/autogen_ext/storage/_chromadb.py | 253 +++++++++++++----- .../src/autogen_ext/storage/_factory.py | 6 +- .../src/autogen_ext/storage/_utils.py | 93 ------- .../tests/storage/test_chroma_db.py | 2 +- 5 files changed, 188 insertions(+), 172 deletions(-) delete mode 100644 python/packages/autogen-ext/src/autogen_ext/storage/_utils.py diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py index 2e73c61e5155..486888691193 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py @@ -156,7 +156,7 @@ async def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str async def retrieve_docs( self, - queries: Sequence[str], + queries: List[str], collection_name: Optional[str] = None, n_results: int = 10, distance_threshold: float = -1, @@ -183,7 +183,7 @@ async def get_docs_by_ids( self, ids: Optional[Sequence[ItemID]] = None, collection_name: Optional[str] = None, - include: Optional[Sequence[str]] = None, + include: Optional[List[str]] = None, **kwargs: Any, ) -> List[Document]: """ @@ -324,7 +324,7 @@ def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = No def retrieve_docs( self, - queries: Sequence[str], + queries: List[str], collection_name: Optional[str] = None, n_results: int = 10, distance_threshold: float = -1, diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py index 618728859175..0fec8a2232e3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py @@ -1,18 +1,17 @@ -# python\packages\autogen-ext\src\autogen_ext\storage\_chromadb.py - import logging import os -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast from autogen_core.application.logging import TRACE_LOGGER_NAME +from chromadb import GetResult, QueryResult if TYPE_CHECKING: - from chromadb.api import AsyncClientAPI, Client + from chromadb.api import AsyncClientAPI, ClientAPI from chromadb.api.models.Collection import Collection + from chromadb.api.types import Embeddable, EmbeddingFunction, IncludeEnum from chromadb.config import Settings from ._base import AsyncVectorDB, Document, ItemID, QueryResults, VectorDB -from ._utils import chroma_results_to_query_results, filter_results_by_distance CHROMADB_MAX_BATCH_SIZE = int(os.environ.get("CHROMADB_MAX_BATCH_SIZE", 40000)) logger = logging.getLogger(f"{TRACE_LOGGER_NAME}.{__name__}") @@ -32,9 +31,11 @@ class ChromaVectorDB(VectorDB): def __init__( self, *, - client: Optional["Client"] = None, + client: Optional["ClientAPI"] = None, path: Optional[str] = None, - embedding_function: Optional[Callable[[List[str]], List[List[float]]]] = None, + embedding_function: Optional[ + Union[Callable[[List[str]], List[List[float]]], "EmbeddingFunction[Embeddable]"] + ] = None, metadata: Optional[Dict[str, Any]] = None, client_type: str = "persistent", host: str = "localhost", @@ -48,7 +49,7 @@ def __init__( client: chromadb.Client | The client object of the vector database. Default is None. If provided, it will use the client object directly and ignore other arguments. path: Optional[str] | The path to the vector database. Required if client_type is 'persistent'. - embedding_function: Callable | The embedding function used to generate the vector representation + embedding_function: Optional[Union[Callable, EmbeddingFunction]] | The embedding function used to generate the vector representation of the documents. Default is None, SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") will be used. metadata: dict | The metadata of the vector database. Default is None. client_type: str | The type of client to use. Can be 'persistent' or 'http'. Default is 'persistent'. @@ -64,8 +65,10 @@ def __init__( if chromadb.__version__ < "0.5.0": raise ImportError("Please upgrade chromadb to version 0.5.0 or later.") - import chromadb.utils.embedding_functions as ef from chromadb.errors import ChromaError + from chromadb.utils.embedding_functions.sentence_transformer_embedding_function import ( + SentenceTransformerEmbeddingFunction, + ) ChromaVectorDB.ChromaError = ChromaError # Set the class attribute except ImportError as e: @@ -73,15 +76,16 @@ def __init__( "Missing dependencies for ChromaVectorDB. Please ensure the autogen-ext package was installed with the 'chromadb' extra." ) from e - self.client: "Client" = client - self.embedding_function = ( - ef.SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") + self.embedding_function: "EmbeddingFunction[Embeddable]" = ( # type: ignore + SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") if embedding_function is None - else embedding_function + else cast("EmbeddingFunction[Embeddable]", embedding_function) ) self.metadata = metadata if metadata else {} self.type = "chroma" - if not self.client: + if client is not None: + self.client: "ClientAPI" = client + else: if client_type == "persistent": if path is None: raise ValueError("Persistent client requires a 'path' to save the database.") @@ -90,6 +94,7 @@ def __init__( self.client = chromadb.HttpClient(host=host, port=port, **kwargs) else: raise ValueError(f"Invalid client_type: {client_type}") + self.active_collection: Optional["Collection"] = None def create_collection( @@ -201,7 +206,7 @@ def _batch_insert( def insert_docs( self, - docs: List[Document], + docs: Sequence[Document], collection_name: Optional[str] = None, upsert: bool = False, **kwargs: Any, @@ -210,7 +215,7 @@ def insert_docs( Insert documents into the collection of the vector database. Args: - docs: List[Document] | A list of documents. Each document is a Pydantic Document model. + docs: Sequence[Document] | A list of documents. Each document is a Pydantic Document model. collection_name: Optional[str] | The name of the collection. Default is None. upsert: bool | Whether to update the document if it exists. Default is False. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -222,8 +227,6 @@ def insert_docs( return if docs[0].content is None and docs[0].embedding is None: raise ValueError("Either document content or embedding is required.") - if docs[0].id is None: - raise ValueError("The document id is required.") documents = [doc.content for doc in docs] if docs[0].content else None ids = [str(doc.id) for doc in docs] collection = self.get_collection(collection_name) @@ -235,8 +238,8 @@ def insert_docs( collection, embeddings=embeddings, ids=ids, - metadatas=metadatas, - documents=documents, + metadatas=metadatas, # type: ignore + documents=documents, # type: ignore upsert=upsert, ) @@ -245,7 +248,7 @@ def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = Update documents in the collection of the vector database. Args: - docs: List[Document] | A list of documents. + docs: Sequence[Document] | A list of documents. collection_name: Optional[str] | The name of the collection. Default is None. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -259,7 +262,7 @@ def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = No Delete documents from the collection of the vector database. Args: - ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. + ids: Sequence[ItemID] | A list of document ids. Each id is a typed `ItemID`. collection_name: Optional[str] | The name of the collection. Default is None. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -267,7 +270,7 @@ def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = No None """ collection = self.get_collection(collection_name) - collection.delete(ids=ids) + collection.delete(ids=[str(id_) for id_ in ids] if ids else None) def retrieve_docs( self, @@ -299,35 +302,49 @@ def retrieve_docs( query_texts=queries, n_results=n_results, ) - results["contents"] = results.pop("documents") - results = chroma_results_to_query_results(results) - results = filter_results_by_distance(results, distance_threshold) - return results + results_list = _chroma_results_to_query_results(results) + results_filtered = filter_results_by_distance(results_list, distance_threshold) + return results_filtered @staticmethod - def _chroma_get_results_to_list_documents(data_dict: Dict[str, Any]) -> List[Document]: - """Converts a dictionary with list values to a list of Document. + def _chroma_get_results_to_list_documents(data_dict: GetResult) -> List[Document]: + """Converts a GetResult dictionary to a list of Document objects. Args: - data_dict: A dictionary where keys map to lists or None. + data_dict: GetResult | A GetResult dictionary containing ids, embeddings, documents, metadatas etc. Returns: - List[Document] | The list of Document. + List[Document] | The list of Document objects. """ results: List[Document] = [] - keys = [key for key in data_dict if data_dict[key] is not None] - for i in range(len(data_dict[keys[0]])): + # Get the length from ids which is always present in GetResult + n_docs = len(data_dict["ids"]) + + for i in range(n_docs): doc_dict = {} - for key in data_dict.keys(): - if data_dict[key] is not None and len(data_dict[key]) > i: - doc_dict[key[:-1]] = data_dict[key][i] + + # Process each possible field from GetResult + if data_dict["ids"]: + doc_dict["id"] = data_dict["ids"][i] + if data_dict["embeddings"] is not None: + doc_dict["embedding"] = data_dict["embeddings"][i] + if data_dict["documents"] is not None: + doc_dict["document"] = data_dict["documents"][i] + if data_dict["metadatas"] is not None: + doc_dict["metadata"] = data_dict["metadatas"][i] + if data_dict["uris"] is not None: + doc_dict["uri"] = data_dict["uris"][i] + if data_dict["data"] is not None: + doc_dict["data"] = data_dict["data"][i] + results.append(Document(**doc_dict)) # type: ignore + return results def get_docs_by_ids( self, - ids: Optional[List[ItemID]] = None, + ids: Optional[Sequence[ItemID]] = None, collection_name: Optional[str] = None, include: Optional[List[str]] = None, **kwargs: Any, @@ -336,19 +353,22 @@ def get_docs_by_ids( Retrieve documents from the collection of the vector database based on the ids. Args: - ids: Optional[List[ItemID]] | A list of document ids. If None, will return all the documents. Default is None. + ids: Optional[Sequence[ItemID]] | A list of document ids. If None, will return all the documents. Default is None. collection_name: Optional[str] | The name of the collection. Default is None. - include: Optional[List[str]] | The fields to include. Default is None. - If None, will include ["metadatas", "documents"]. IDs are always included. + include: Optional[List[IncludeEnum]] | The fields to include. Default is None. + If None, will include [IncludeEnum.metadatas, IncludeEnum.documents]. IDs are always included. kwargs: Dict[str, Any] | Additional keyword arguments. Returns: List[Document] | The results. """ + if include is not None: + include_enums = [IncludeEnum(item) for item in include] + else: + include_enums = [IncludeEnum.metadatas, IncludeEnum.documents] collection = self.get_collection(collection_name) - if include is None: - include = ["metadatas", "documents"] - results = collection.get(ids=ids, include=include) + + results = collection.get(ids=[str(id_) for id_ in ids] if ids else None, include=include_enums) results = self._chroma_get_results_to_list_documents(results) return results @@ -367,8 +387,10 @@ class AsyncChromaVectorDB(AsyncVectorDB): def __init__( self, *, - client: "AsyncClientAPI", - embedding_function: Optional[Callable[[List[str]], List[List[float]]]] = None, + client: Optional["AsyncClientAPI"] = None, + embedding_function: Optional[ + Union[Callable[[List[str]], List[List[float]]], "EmbeddingFunction[Embeddable]"] + ] = None, host: str = "localhost", port: int = 8000, ssl: bool = False, @@ -404,6 +426,9 @@ def __init__( if chromadb.__version__ < "0.5.0": raise ImportError("Please upgrade chromadb to version 0.5.0 or later.") from chromadb.errors import ChromaError + from chromadb.utils.embedding_functions.sentence_transformer_embedding_function import ( + SentenceTransformerEmbeddingFunction, + ) AsyncChromaVectorDB.ChromaError = ChromaError # Set the class attribute except ImportError as e: @@ -411,13 +436,17 @@ def __init__( "Missing dependencies for AsyncChromaVectorDB. Please ensure the autogen-ext package was installed with the 'chromadb' extra." ) from e - self.client: "AsyncClientAPI" = client - self.embedding_function = embedding_function - if self.embedding_function is None: - raise ValueError("An embedding function must be provided for AsyncChromaVectorDB.") + self.embedding_function: "EmbeddingFunction[Embeddable]" = ( # type: ignore + cast( + "EmbeddingFunction[Embeddable]", + embedding_function or SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2"), + ) + ) self.type = "chroma" - if not self.client: - self.client = chromadb.AsyncHttpClient( + if client is not None: + self.client: "AsyncClientAPI" = client + else: + self.client = chromadb.AsyncHttpClient( # type: ignore host=host, port=port, ssl=ssl, @@ -427,7 +456,7 @@ def __init__( database=database, **kwargs, ) - self.active_collection: Optional["Collection"] = None + self.active_collection: Optional[Any] = None async def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: """ @@ -536,7 +565,7 @@ async def _batch_insert( async def insert_docs( self, - docs: List[Document], + docs: Sequence[Document], collection_name: Optional[str] = None, upsert: bool = False, **kwargs: Any, @@ -545,7 +574,7 @@ async def insert_docs( Insert documents into the collection of the vector database. Args: - docs: List[Document] | A list of documents. Each document is a Pydantic Document model. + docs: Sequence[Document] | A list of documents. Each document is a Pydantic Document model. collection_name: Optional[str] | The name of the collection. Default is None. upsert: bool | Whether to update the document if it exists. Default is False. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -557,8 +586,6 @@ async def insert_docs( return if docs[0].content is None and docs[0].embedding is None: raise ValueError("Either document content or embedding is required.") - if docs[0].id is None: - raise ValueError("The document id is required.") documents = [doc.content for doc in docs] if docs[0].content else None ids = [str(doc.id) for doc in docs] collection = await self.get_collection(collection_name) @@ -575,12 +602,12 @@ async def insert_docs( upsert=upsert, ) - async def update_docs(self, docs: List[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + async def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: """ Update documents in the collection of the vector database. Args: - docs: List[Document] | A list of documents. + docs: Sequence[Document] | A list of documents. collection_name: Optional[str] | The name of the collection. Default is None. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -589,12 +616,12 @@ async def update_docs(self, docs: List[Document], collection_name: Optional[str] """ await self.insert_docs(docs, collection_name=collection_name, upsert=True, **kwargs) - async def delete_docs(self, ids: List[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: + async def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: """ Delete documents from the collection of the vector database. Args: - ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. + ids: Sequence[ItemID] | A list of document ids. Each id is a typed `ItemID`. collection_name: Optional[str] | The name of the collection. Default is None. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -634,10 +661,9 @@ async def retrieve_docs( query_texts=queries, n_results=n_results, ) - results["contents"] = results.pop("documents") - results = chroma_results_to_query_results(results) - results = filter_results_by_distance(results, distance_threshold) - return results + results_list = _chroma_results_to_query_results(results) + results_filtered = filter_results_by_distance(results_list, distance_threshold) + return results_filtered @staticmethod def _chroma_get_results_to_list_documents(data_dict: Dict[str, Any]) -> List[Document]: @@ -649,7 +675,7 @@ def _chroma_get_results_to_list_documents(data_dict: Dict[str, Any]) -> List[Doc Returns: List[Document] | The list of Document. """ - results = [] + results: List[Document] = [] keys = [key for key in data_dict if data_dict[key] is not None] for i in range(len(data_dict[keys[0]])): @@ -662,7 +688,7 @@ def _chroma_get_results_to_list_documents(data_dict: Dict[str, Any]) -> List[Doc async def get_docs_by_ids( self, - ids: Optional[List[ItemID]] = None, + ids: Optional[Sequence[ItemID]] = None, collection_name: Optional[str] = None, include: Optional[List[str]] = None, **kwargs: Any, @@ -671,18 +697,101 @@ async def get_docs_by_ids( Retrieve documents from the collection of the vector database based on the ids. Args: - ids: Optional[List[ItemID]] | A list of document ids. If None, will return all the documents. Default is None. + ids: Optional[Sequence[ItemID]] | A list of document ids. If None, will return all the documents. Default is None. collection_name: Optional[str] | The name of the collection. Default is None. - include: Optional[List[str]] | The fields to include. Default is None. - If None, will include ["metadatas", "documents"]. IDs are always included. + include: Optional[Sequence[IncludeEnum]] | The fields to include. Default is None. + If None, will include [IncludeEnum.metadatas, IncludeEnum.documents]. IDs are always included. kwargs: Dict[str, Any] | Additional keyword arguments. Returns: List[Document] | The results. """ collection = await self.get_collection(collection_name) - if include is None: - include = ["metadatas", "documents"] - results = await collection.get(ids=ids, include=include) + if include is not None: + include_enums = [IncludeEnum(item) for item in include] + else: + include_enums = None + results = await collection.get(ids=ids, include=include_enums) results = self._chroma_get_results_to_list_documents(results) return results + + +def _chroma_results_to_query_results( + data_dict: QueryResult, special_key: str = "distances" +) -> List[List[Tuple[Dict[str, Any], float]]]: + """Converts a dictionary with list-of-list values to a list of tuples. + + Args: + data_dict: A dictionary where keys map to lists of lists or None. + special_key: str | The key in the dictionary containing the special values + for each tuple. + + Returns: + List[List[Tuple[Dict[str, Any], float]]] | A list of tuples, where each tuple contains + a sub-dictionary with some keys from the original dictionary and the value from the + special_key. + + Example: + data_dict = { + "key1s": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + "key2s": [["a", "b", "c"], ["c", "d", "e"], ["e", "f", "g"]], + "key3s": None, + "key4s": [["x", "y", "z"], ["1", "2", "3"], ["4", "5", "6"]], + "distances": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], + } + + results = [ + [ + ({"key1": 1, "key2": "a", "key4": "x"}, 0.1), + ({"key1": 2, "key2": "b", "key4": "y"}, 0.2), + ({"key1": 3, "key2": "c", "key4": "z"}, 0.3), + ], + [ + ({"key1": 4, "key2": "c", "key4": "1"}, 0.4), + ({"key1": 5, "key2": "d", "key4": "2"}, 0.5), + ({"key1": 6, "key2": "e", "key4": "3"}, 0.6), + ], + [ + ({"key1": 7, "key2": "e", "key4": "4"}, 0.7), + ({"key1": 8, "key2": "f", "key4": "5"}, 0.8), + ({"key1": 9, "key2": "g", "key4": "6"}, 0.9), + ], + ] + """ + + if not data_dict or special_key not in data_dict or not data_dict.get(special_key): + return [] + + result: List[List[Tuple[Document, float]]] = [] + data_special_key: Any = data_dict[special_key] + + if data_special_key is None: + return result + + for i in range(len(data_special_key)): + sub_result: List[Tuple[Document, float]] = [] + for j, distance in enumerate(data_special_key[i]): # type: ignore + document = data_dict["documents"][i][j] # type: ignore + sub_result.append((document, distance)) + result.append(sub_result) + + return result + + +def filter_results_by_distance( + results: List[List[Tuple[Dict[str, Any], float]]], distance_threshold: float = -1 +) -> QueryResults: + """Filters results based on a distance threshold. + + Args: + results: QueryResults | The query results. List[List[Tuple[Document, float]]] + distance_threshold: The maximum distance allowed for results. + + Returns: + QueryResults | A filtered results containing only distances smaller than the threshold. + """ + + if distance_threshold > 0: + results = [[(key, value) for key, value in data if value < distance_threshold] for data in results] + + return results diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_factory.py b/python/packages/autogen-ext/src/autogen_ext/storage/_factory.py index ba9f2d047494..ab77a7091ab7 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_factory.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_factory.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Any, Literal from ._base import VectorDB @@ -11,7 +11,7 @@ class VectorDBFactory: PREDEFINED_VECTOR_DB = ["chromadb"] @staticmethod - def create_vector_db(db_type: Literal["chromadb"], **kwargs) -> VectorDB: + def create_vector_db(db_type: Literal["chromadb"], **kwargs: Any) -> VectorDB: """ Create a vector database. @@ -25,7 +25,7 @@ def create_vector_db(db_type: Literal["chromadb"], **kwargs) -> VectorDB: if db_type.lower() == "chromadb": from ._chromadb import ChromaVectorDB - return ChromaVectorDB(**kwargs) + return ChromaVectorDB(**kwargs) # type: ignore else: raise ValueError( diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_utils.py b/python/packages/autogen-ext/src/autogen_ext/storage/_utils.py deleted file mode 100644 index fe4c08e9edf6..000000000000 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_utils.py +++ /dev/null @@ -1,93 +0,0 @@ -from typing import Any, Dict, List, Optional, Tuple - -from ._base import QueryResults - - -def filter_results_by_distance(results: QueryResults, distance_threshold: float = -1) -> QueryResults: - """Filters results based on a distance threshold. - - Args: - results: QueryResults | The query results. List[List[Tuple[Document, float]]] - distance_threshold: The maximum distance allowed for results. - - Returns: - QueryResults | A filtered results containing only distances smaller than the threshold. - """ - - if distance_threshold > 0: - results = [[(key, value) for key, value in data if value < distance_threshold] for data in results] - - return results - - -def chroma_results_to_query_results( - data_dict: Dict[str, Optional[List[List[Any]]]], special_key: str = "distances" -) -> List[List[Tuple[Dict[str, Any], float]]]: - """Converts a dictionary with list-of-list values to a list of tuples. - - Args: - data_dict: A dictionary where keys map to lists of lists or None. - special_key: str | The key in the dictionary containing the special values - for each tuple. - - Returns: - List[List[Tuple[Dict[str, Any], float]]] | A list of tuples, where each tuple contains - a sub-dictionary with some keys from the original dictionary and the value from the - special_key. - - Example: - data_dict = { - "key1s": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], - "key2s": [["a", "b", "c"], ["c", "d", "e"], ["e", "f", "g"]], - "key3s": None, - "key4s": [["x", "y", "z"], ["1", "2", "3"], ["4", "5", "6"]], - "distances": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], - } - - results = [ - [ - ({"key1": 1, "key2": "a", "key4": "x"}, 0.1), - ({"key1": 2, "key2": "b", "key4": "y"}, 0.2), - ({"key1": 3, "key2": "c", "key4": "z"}, 0.3), - ], - [ - ({"key1": 4, "key2": "c", "key4": "1"}, 0.4), - ({"key1": 5, "key2": "d", "key4": "2"}, 0.5), - ({"key1": 6, "key2": "e", "key4": "3"}, 0.6), - ], - [ - ({"key1": 7, "key2": "e", "key4": "4"}, 0.7), - ({"key1": 8, "key2": "f", "key4": "5"}, 0.8), - ({"key1": 9, "key2": "g", "key4": "6"}, 0.9), - ], - ] - """ - - if not data_dict or special_key not in data_dict or not data_dict[special_key]: - return [] - - keys: List[str] = [ - key - for key in data_dict - if key != special_key - and data_dict[key] is not None - and isinstance(data_dict[key], list) - and len(data_dict[key]) > 0 - and isinstance(data_dict[key][0], list) - ] - result: List[List[Tuple[Dict[str, Any], float]]] = [] - data_special_key = data_dict[special_key] - - assert data_special_key is not None - - for i in range(len(data_special_key)): - sub_result: List[Tuple[Dict[str, Any], float]] = [] - for j, distance in enumerate(data_special_key[i]): - sub_dict: Dict[str, Any] = {} - for key in keys: - if len(data_dict[key]) > i and len(data_dict[key][i]) > j: - sub_dict[key[:-1]] = data_dict[key][i][j] # remove 's' at the end from key - sub_result.append((sub_dict, distance)) - result.append(sub_result) - - return result diff --git a/python/packages/autogen-ext/tests/storage/test_chroma_db.py b/python/packages/autogen-ext/tests/storage/test_chroma_db.py index 5be4e06a014f..a47e6a7edbb9 100644 --- a/python/packages/autogen-ext/tests/storage/test_chroma_db.py +++ b/python/packages/autogen-ext/tests/storage/test_chroma_db.py @@ -1,6 +1,6 @@ -from autogen_ext.storage._base import Document import pytest from autogen_ext.storage import ChromaVectorDB +from autogen_ext.storage._base import Document from chromadb.errors import ChromaError From b9b72d6c3ab49d6fb5041e3b06312bee8c3a0464 Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Thu, 24 Oct 2024 14:33:51 +1000 Subject: [PATCH 04/12] fix pyproject version --- python/packages/autogen-ext/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml index 24444d44ef59..5bb3da44856b 100644 --- a/python/packages/autogen-ext/pyproject.toml +++ b/python/packages/autogen-ext/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "autogen-ext" -version = "0.4.0dev2" +version = "0.4.0.dev2" license = {file = "LICENSE-CODE"} description = "AutoGen extensions library" readme = "README.md" @@ -15,7 +15,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "autogen-core==0.4.0dev2", + "autogen-core==0.4.0.dev2", ] From ae4d8aeab46d4ae1be2142c3589d2dd8619e5b00 Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Thu, 24 Oct 2024 16:47:58 +1000 Subject: [PATCH 05/12] update shared functions --- .../src/autogen_ext/storage/_base.py | 16 +- .../src/autogen_ext/storage/_chromadb.py | 215 ++++++++---------- .../tests/storage/test_chroma_db.py | 15 -- 3 files changed, 104 insertions(+), 142 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py index 486888691193..164c8e1320e6 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py @@ -107,7 +107,7 @@ async def delete_collection(self, collection_name: str) -> Any: async def insert_docs( self, - docs: Sequence[Document], + docs: List[Document], collection_name: Optional[str] = None, upsert: bool = False, **kwargs: Any, @@ -126,7 +126,7 @@ async def insert_docs( """ ... - async def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + async def update_docs(self, docs: List[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: """ Update documents in the collection of the vector database. @@ -140,7 +140,7 @@ async def update_docs(self, docs: Sequence[Document], collection_name: Optional[ """ ... - async def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: + async def delete_docs(self, ids: List[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: """ Delete documents from the collection of the vector database. @@ -181,7 +181,7 @@ async def retrieve_docs( async def get_docs_by_ids( self, - ids: Optional[Sequence[ItemID]] = None, + ids: Optional[List[ItemID]] = None, collection_name: Optional[str] = None, include: Optional[List[str]] = None, **kwargs: Any, @@ -275,7 +275,7 @@ def delete_collection(self, collection_name: str) -> Any: def insert_docs( self, - docs: Sequence[Document], + docs: List[Document], collection_name: Optional[str] = None, upsert: bool = False, **kwargs: Any, @@ -294,7 +294,7 @@ def insert_docs( """ ... - def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + def update_docs(self, docs: List[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: """ Update documents in the collection of the vector database. @@ -308,7 +308,7 @@ def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = """ ... - def delete_docs(self, ids: Sequence[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: + def delete_docs(self, ids: List[ItemID], collection_name: Optional[str] = None, **kwargs: Any) -> None: """ Delete documents from the collection of the vector database. @@ -349,7 +349,7 @@ def retrieve_docs( def get_docs_by_ids( self, - ids: Optional[Sequence[ItemID]] = None, + ids: Optional[List[ItemID]] = None, collection_name: Optional[str] = None, include: Optional[List[str]] = None, **kwargs: Any, diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py index 0fec8a2232e3..46f2008f201d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py @@ -3,15 +3,16 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast from autogen_core.application.logging import TRACE_LOGGER_NAME -from chromadb import GetResult, QueryResult +from chromadb import GetResult +from chromadb import QueryResult as ChromaQueryResult if TYPE_CHECKING: from chromadb.api import AsyncClientAPI, ClientAPI from chromadb.api.models.Collection import Collection - from chromadb.api.types import Embeddable, EmbeddingFunction, IncludeEnum + from chromadb.api.types import Embeddable, EmbeddingFunction from chromadb.config import Settings -from ._base import AsyncVectorDB, Document, ItemID, QueryResults, VectorDB +from ._base import AsyncVectorDB, Document, ItemID, Metadata, QueryResults, Vector, VectorDB CHROMADB_MAX_BATCH_SIZE = int(os.environ.get("CHROMADB_MAX_BATCH_SIZE", 40000)) logger = logging.getLogger(f"{TRACE_LOGGER_NAME}.{__name__}") @@ -65,6 +66,7 @@ def __init__( if chromadb.__version__ < "0.5.0": raise ImportError("Please upgrade chromadb to version 0.5.0 or later.") + from chromadb.api.types import IncludeEnum from chromadb.errors import ChromaError from chromadb.utils.embedding_functions.sentence_transformer_embedding_function import ( SentenceTransformerEmbeddingFunction, @@ -76,12 +78,13 @@ def __init__( "Missing dependencies for ChromaVectorDB. Please ensure the autogen-ext package was installed with the 'chromadb' extra." ) from e + self.IncludeEnum = IncludeEnum self.embedding_function: "EmbeddingFunction[Embeddable]" = ( # type: ignore SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") if embedding_function is None else cast("EmbeddingFunction[Embeddable]", embedding_function) ) - self.metadata = metadata if metadata else {} + self.metadata = metadata self.type = "chroma" if client is not None: self.client: "ClientAPI" = client @@ -124,6 +127,7 @@ def create_collection( ) except (ValueError, ChromaVectorDB.ChromaError): collection = None + if collection is None: return self.client.create_collection( name=collection_name, @@ -206,7 +210,7 @@ def _batch_insert( def insert_docs( self, - docs: Sequence[Document], + docs: List[Document], collection_name: Optional[str] = None, upsert: bool = False, **kwargs: Any, @@ -215,7 +219,7 @@ def insert_docs( Insert documents into the collection of the vector database. Args: - docs: Sequence[Document] | A list of documents. Each document is a Pydantic Document model. + docs: List[Document] | A list of documents. Each document is a Pydantic Document model. collection_name: Optional[str] | The name of the collection. Default is None. upsert: bool | Whether to update the document if it exists. Default is False. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -243,12 +247,12 @@ def insert_docs( upsert=upsert, ) - def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + def update_docs(self, docs: List[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: """ Update documents in the collection of the vector database. Args: - docs: Sequence[Document] | A list of documents. + docs: List[Document] | A list of documents. collection_name: Optional[str] | The name of the collection. Default is None. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -303,45 +307,9 @@ def retrieve_docs( n_results=n_results, ) results_list = _chroma_results_to_query_results(results) - results_filtered = filter_results_by_distance(results_list, distance_threshold) + results_filtered = _filter_results_by_distance(results_list, distance_threshold) return results_filtered - @staticmethod - def _chroma_get_results_to_list_documents(data_dict: GetResult) -> List[Document]: - """Converts a GetResult dictionary to a list of Document objects. - - Args: - data_dict: GetResult | A GetResult dictionary containing ids, embeddings, documents, metadatas etc. - - Returns: - List[Document] | The list of Document objects. - """ - results: List[Document] = [] - - # Get the length from ids which is always present in GetResult - n_docs = len(data_dict["ids"]) - - for i in range(n_docs): - doc_dict = {} - - # Process each possible field from GetResult - if data_dict["ids"]: - doc_dict["id"] = data_dict["ids"][i] - if data_dict["embeddings"] is not None: - doc_dict["embedding"] = data_dict["embeddings"][i] - if data_dict["documents"] is not None: - doc_dict["document"] = data_dict["documents"][i] - if data_dict["metadatas"] is not None: - doc_dict["metadata"] = data_dict["metadatas"][i] - if data_dict["uris"] is not None: - doc_dict["uri"] = data_dict["uris"][i] - if data_dict["data"] is not None: - doc_dict["data"] = data_dict["data"][i] - - results.append(Document(**doc_dict)) # type: ignore - - return results - def get_docs_by_ids( self, ids: Optional[Sequence[ItemID]] = None, @@ -363,13 +331,13 @@ def get_docs_by_ids( List[Document] | The results. """ if include is not None: - include_enums = [IncludeEnum(item) for item in include] + include_enums = [self.IncludeEnum(item) for item in include] else: - include_enums = [IncludeEnum.metadatas, IncludeEnum.documents] + include_enums = [self.IncludeEnum.metadatas, self.IncludeEnum.documents] collection = self.get_collection(collection_name) results = collection.get(ids=[str(id_) for id_ in ids] if ids else None, include=include_enums) - results = self._chroma_get_results_to_list_documents(results) + results = _chroma_get_results_to_list_documents(results) return results @@ -425,6 +393,7 @@ def __init__( if chromadb.__version__ < "0.5.0": raise ImportError("Please upgrade chromadb to version 0.5.0 or later.") + from chromadb.api.types import IncludeEnum from chromadb.errors import ChromaError from chromadb.utils.embedding_functions.sentence_transformer_embedding_function import ( SentenceTransformerEmbeddingFunction, @@ -436,6 +405,7 @@ def __init__( "Missing dependencies for AsyncChromaVectorDB. Please ensure the autogen-ext package was installed with the 'chromadb' extra." ) from e + self.IncludeEnum = IncludeEnum self.embedding_function: "EmbeddingFunction[Embeddable]" = ( # type: ignore cast( "EmbeddingFunction[Embeddable]", @@ -565,7 +535,7 @@ async def _batch_insert( async def insert_docs( self, - docs: Sequence[Document], + docs: List[Document], collection_name: Optional[str] = None, upsert: bool = False, **kwargs: Any, @@ -574,7 +544,7 @@ async def insert_docs( Insert documents into the collection of the vector database. Args: - docs: Sequence[Document] | A list of documents. Each document is a Pydantic Document model. + docs: List[Document] | A list of documents. Each document is a Pydantic Document model. collection_name: Optional[str] | The name of the collection. Default is None. upsert: bool | Whether to update the document if it exists. Default is False. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -597,17 +567,17 @@ async def insert_docs( collection, embeddings=embeddings, ids=ids, - metadatas=metadatas, - documents=documents, + metadatas=metadatas, # type: ignore + documents=documents, # type: ignore upsert=upsert, ) - async def update_docs(self, docs: Sequence[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: + async def update_docs(self, docs: List[Document], collection_name: Optional[str] = None, **kwargs: Any) -> None: """ Update documents in the collection of the vector database. Args: - docs: Sequence[Document] | A list of documents. + docs: List[Document] | A list of documents. collection_name: Optional[str] | The name of the collection. Default is None. kwargs: Dict[str, Any] | Additional keyword arguments. @@ -655,6 +625,7 @@ async def retrieve_docs( the distance. """ collection = await self.get_collection(collection_name) + if isinstance(queries, str): queries = [queries] results = await collection.query( @@ -662,30 +633,9 @@ async def retrieve_docs( n_results=n_results, ) results_list = _chroma_results_to_query_results(results) - results_filtered = filter_results_by_distance(results_list, distance_threshold) + results_filtered = _filter_results_by_distance(results_list, distance_threshold) return results_filtered - @staticmethod - def _chroma_get_results_to_list_documents(data_dict: Dict[str, Any]) -> List[Document]: - """Converts a dictionary with list values to a list of Document. - - Args: - data_dict: A dictionary where keys map to lists or None. - - Returns: - List[Document] | The list of Document. - """ - results: List[Document] = [] - keys = [key for key in data_dict if data_dict[key] is not None] - - for i in range(len(data_dict[keys[0]])): - doc_dict = {} - for key in data_dict.keys(): - if data_dict[key] is not None and len(data_dict[key]) > i: - doc_dict[key[:-1]] = data_dict[key][i] - results.append(Document(**doc_dict)) # type: ignore - return results - async def get_docs_by_ids( self, ids: Optional[Sequence[ItemID]] = None, @@ -708,55 +658,45 @@ async def get_docs_by_ids( """ collection = await self.get_collection(collection_name) if include is not None: - include_enums = [IncludeEnum(item) for item in include] + include_enums = [self.IncludeEnum(item) for item in include] else: - include_enums = None - results = await collection.get(ids=ids, include=include_enums) - results = self._chroma_get_results_to_list_documents(results) - return results + include_enums = [self.IncludeEnum.metadatas, self.IncludeEnum.documents] + results: GetResult = await collection.get(ids=ids, include=include_enums) + results_list = _chroma_get_results_to_list_documents(results) + return results_list -def _chroma_results_to_query_results( - data_dict: QueryResult, special_key: str = "distances" -) -> List[List[Tuple[Dict[str, Any], float]]]: - """Converts a dictionary with list-of-list values to a list of tuples. +def _chroma_results_to_query_results(data_dict: ChromaQueryResult, special_key: str = "distances") -> QueryResults: + """Converts a ChromaDB query result into a list of lists of (Document, float) tuples. Args: - data_dict: A dictionary where keys map to lists of lists or None. - special_key: str | The key in the dictionary containing the special values - for each tuple. + data_dict: A dictionary containing the results of a ChromaDB query. + special_key: The key in the dictionary containing the float values for each tuple (default is "distances"). Returns: - List[List[Tuple[Dict[str, Any], float]]] | A list of tuples, where each tuple contains - a sub-dictionary with some keys from the original dictionary and the value from the - special_key. + A list of lists, where each sublist corresponds to a query and contains tuples of (Document, float). Example: data_dict = { - "key1s": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], - "key2s": [["a", "b", "c"], ["c", "d", "e"], ["e", "f", "g"]], - "key3s": None, - "key4s": [["x", "y", "z"], ["1", "2", "3"], ["4", "5", "6"]], - "distances": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], + 'ids': [['1', '2'], ['3', '4']], + 'documents': [['doc1', 'doc2'], ['doc3', 'doc4']], + 'metadatas': [[{'meta': 'data1'}, {'meta': 'data2'}], [{'meta': 'data3'}, {'meta': 'data4'}]], + 'distances': [[0.1, 0.2], [0.3, 0.4]], } - results = [ - [ - ({"key1": 1, "key2": "a", "key4": "x"}, 0.1), - ({"key1": 2, "key2": "b", "key4": "y"}, 0.2), - ({"key1": 3, "key2": "c", "key4": "z"}, 0.3), - ], - [ - ({"key1": 4, "key2": "c", "key4": "1"}, 0.4), - ({"key1": 5, "key2": "d", "key4": "2"}, 0.5), - ({"key1": 6, "key2": "e", "key4": "3"}, 0.6), - ], - [ - ({"key1": 7, "key2": "e", "key4": "4"}, 0.7), - ({"key1": 8, "key2": "f", "key4": "5"}, 0.8), - ({"key1": 9, "key2": "g", "key4": "6"}, 0.9), - ], - ] + results = _chroma_results_to_query_results(data_dict) + + # results will be: + # [ + # [ + # (Document(id='1', content='doc1', metadata={'meta': 'data1'}, embedding=None), 0.1), + # (Document(id='2', content='doc2', metadata={'meta': 'data2'}, embedding=None), 0.2), + # ], + # [ + # (Document(id='3', content='doc3', metadata={'meta': 'data3'}, embedding=None), 0.3), + # (Document(id='4', content='doc4', metadata={'meta': 'data4'}, embedding=None), 0.4), + # ], + # ] """ if not data_dict or special_key not in data_dict or not data_dict.get(special_key): @@ -770,17 +710,25 @@ def _chroma_results_to_query_results( for i in range(len(data_special_key)): sub_result: List[Tuple[Document, float]] = [] - for j, distance in enumerate(data_special_key[i]): # type: ignore - document = data_dict["documents"][i][j] # type: ignore - sub_result.append((document, distance)) + ids = data_dict["ids"][i] + documents = data_dict.get("documents") or [None] * len(ids) + metadatas = data_dict.get("metadatas") or [None] * len(ids) + embeddings = data_dict.get("embeddings") or [None] * len(ids) + for j in range(len(data_special_key[i])): + document = Document( + id=ids[j], + content=cast(str, documents[j]), + metadata=cast(Metadata, metadatas[j]), + embedding=cast(Vector, embeddings[j]), + ) + value = data_special_key[i][j] + sub_result.append((document, value)) result.append(sub_result) return result -def filter_results_by_distance( - results: List[List[Tuple[Dict[str, Any], float]]], distance_threshold: float = -1 -) -> QueryResults: +def _filter_results_by_distance(results: QueryResults, distance_threshold: float = -1) -> QueryResults: """Filters results based on a distance threshold. Args: @@ -795,3 +743,32 @@ def filter_results_by_distance( results = [[(key, value) for key, value in data if value < distance_threshold] for data in results] return results + + +def _chroma_get_results_to_list_documents(data_dict: GetResult) -> List[Document]: + """Converts a dictionary with list values to a list of Document. + + Args: + data_dict: A dictionary where keys map to lists or None. + + Returns: + List[Document] | The list of Document. + """ + results: List[Document] = [] + + num_items = len(data_dict["ids"]) + ids = data_dict["ids"] + documents = data_dict.get("documents") or [None] * num_items + metadatas = data_dict.get("metadatas") or [None] * num_items + embeddings = data_dict.get("embeddings") or [None] * num_items + + for i in range(num_items): + results.append( + Document( + id=ids[i], + content=documents[i], + metadata=metadatas[i], + embedding=cast(Vector, embeddings[i]), + ) + ) + return results diff --git a/python/packages/autogen-ext/tests/storage/test_chroma_db.py b/python/packages/autogen-ext/tests/storage/test_chroma_db.py index a47e6a7edbb9..5cb150bdbbab 100644 --- a/python/packages/autogen-ext/tests/storage/test_chroma_db.py +++ b/python/packages/autogen-ext/tests/storage/test_chroma_db.py @@ -63,18 +63,3 @@ def test_chromadb(): assert [r.id for r in res] == ["2"] # "1" has been deleted res = db.get_docs_by_ids(collection_name=collection_name) assert [r.id for r in res] == ["2", "3"] - - # test _chroma_get_results_to_list_documents - data_dict = { - "key1s": [1, 2, 3], - "key2s": ["a", "b", "c"], - "key3s": None, - "key4s": ["x", "y", "z"], - } - - results = [ - {"key1": 1, "key2": "a", "key4": "x"}, - {"key1": 2, "key2": "b", "key4": "y"}, - {"key1": 3, "key2": "c", "key4": "z"}, - ] - assert db._chroma_get_results_to_list_documents(data_dict) == results # type: ignore From 1d59e51616a7518ab42a652077651dd862b9a913 Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Thu, 24 Oct 2024 16:51:47 +1000 Subject: [PATCH 06/12] update lock file --- python/uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/uv.lock b/python/uv.lock index 02bba38dfa1a..47c7981ce745 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -505,7 +505,7 @@ dev = [ [[package]] name = "autogen-ext" -version = "0.4.0.dev1" +version = "0.4.0.dev2" source = { editable = "packages/autogen-ext" } dependencies = [ { name = "autogen-core" }, From 299a2eb866276240a46cd140ca91f8d91c2484cd Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Thu, 24 Oct 2024 17:20:04 +1000 Subject: [PATCH 07/12] mypy fixes --- .../src/autogen_ext/storage/_base.py | 4 +-- .../src/autogen_ext/storage/_chromadb.py | 32 ++++++++++--------- .../tests/storage/test_chroma_db.py | 18 +++++------ 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py index 164c8e1320e6..d2a53e7bd5ac 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py @@ -58,9 +58,7 @@ class AsyncVectorDB(Protocol): active_collection: Any = None type: str = "" - embedding_function: Optional[Callable[[List[str]], List[List[float]]]] = ( - None # embeddings = embedding_function(sentences) - ) + embedding_function: Optional[Callable[..., Any]] = None # embeddings = embedding_function(sentences) async def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: """ diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py index 46f2008f201d..803bff243bde 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py @@ -1,6 +1,6 @@ import logging import os -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union, cast +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union, cast from autogen_core.application.logging import TRACE_LOGGER_NAME from chromadb import GetResult @@ -79,10 +79,10 @@ def __init__( ) from e self.IncludeEnum = IncludeEnum - self.embedding_function: "EmbeddingFunction[Embeddable]" = ( # type: ignore - SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") + self.embedding_function: "EmbeddingFunction[Any]" = ( # type: ignore + SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") # type: ignore if embedding_function is None - else cast("EmbeddingFunction[Embeddable]", embedding_function) + else cast("EmbeddingFunction[Any]", embedding_function) ) self.metadata = metadata self.type = "chroma" @@ -337,8 +337,8 @@ def get_docs_by_ids( collection = self.get_collection(collection_name) results = collection.get(ids=[str(id_) for id_ in ids] if ids else None, include=include_enums) - results = _chroma_get_results_to_list_documents(results) - return results + results_list = _chroma_get_results_to_list_documents(results) + return results_list class AsyncChromaVectorDB(AsyncVectorDB): @@ -666,7 +666,9 @@ async def get_docs_by_ids( return results_list -def _chroma_results_to_query_results(data_dict: ChromaQueryResult, special_key: str = "distances") -> QueryResults: +def _chroma_results_to_query_results( + data_dict: ChromaQueryResult, special_key: Literal["distances"] = "distances" +) -> QueryResults: """Converts a ChromaDB query result into a list of lists of (Document, float) tuples. Args: @@ -703,7 +705,7 @@ def _chroma_results_to_query_results(data_dict: ChromaQueryResult, special_key: return [] result: List[List[Tuple[Document, float]]] = [] - data_special_key: Any = data_dict[special_key] + data_special_key: Optional[List[List[float]]] = data_dict[special_key] if data_special_key is None: return result @@ -711,15 +713,15 @@ def _chroma_results_to_query_results(data_dict: ChromaQueryResult, special_key: for i in range(len(data_special_key)): sub_result: List[Tuple[Document, float]] = [] ids = data_dict["ids"][i] - documents = data_dict.get("documents") or [None] * len(ids) - metadatas = data_dict.get("metadatas") or [None] * len(ids) + documents = data_dict.get("documents") or [None] * len(ids) # type: ignore + metadatas = data_dict.get("metadatas") or [None] * len(ids) # type: ignore embeddings = data_dict.get("embeddings") or [None] * len(ids) for j in range(len(data_special_key[i])): document = Document( id=ids[j], - content=cast(str, documents[j]), - metadata=cast(Metadata, metadatas[j]), - embedding=cast(Vector, embeddings[j]), + content=cast(Optional[str], documents[j]), + metadata=cast(Optional[Metadata], metadatas[j]), + embedding=cast(Optional[Vector], embeddings[j]), ) value = data_special_key[i][j] sub_result.append((document, value)) @@ -758,8 +760,8 @@ def _chroma_get_results_to_list_documents(data_dict: GetResult) -> List[Document num_items = len(data_dict["ids"]) ids = data_dict["ids"] - documents = data_dict.get("documents") or [None] * num_items - metadatas = data_dict.get("metadatas") or [None] * num_items + documents = data_dict.get("documents") or [None] * num_items # type: ignore + metadatas = data_dict.get("metadatas") or [None] * num_items # type: ignore embeddings = data_dict.get("embeddings") or [None] * num_items for i in range(num_items): diff --git a/python/packages/autogen-ext/tests/storage/test_chroma_db.py b/python/packages/autogen-ext/tests/storage/test_chroma_db.py index 5cb150bdbbab..9a16b25a68de 100644 --- a/python/packages/autogen-ext/tests/storage/test_chroma_db.py +++ b/python/packages/autogen-ext/tests/storage/test_chroma_db.py @@ -5,7 +5,7 @@ # @pytest.mark.skipif(skip, reason="dependency is not installed") -def test_chromadb(): +def test_chromadb() -> None: # test create collection db = ChromaVectorDB(path=".db") collection_name = "test_collection" @@ -53,13 +53,13 @@ def test_chromadb(): # test_retrieve_docs queries = ["doc2", "doc3"] collection_name = "test_collection" - res = db.retrieve_docs(queries, collection_name) - assert [[r[0].id for r in rr] for rr in res] == [["2", "3"], ["3", "2"]] - res = db.retrieve_docs(queries, collection_name, distance_threshold=0.1) - assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] + res = db.retrieve_docs(queries, collection_name) # type: ignore + assert [[r[0].id for r in rr] for rr in res] == [["2", "3"], ["3", "2"]] # type: ignore + res = db.retrieve_docs(queries, collection_name, distance_threshold=0.1) # type: ignore + assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] # type: ignore # test_get_docs_by_ids - res = db.get_docs_by_ids(["1", "2"], collection_name) - assert [r.id for r in res] == ["2"] # "1" has been deleted - res = db.get_docs_by_ids(collection_name=collection_name) - assert [r.id for r in res] == ["2", "3"] + res = db.get_docs_by_ids(["1", "2"], collection_name) # type: ignore + assert [r.id for r in res] == ["2"] # type: ignore + res = db.get_docs_by_ids(collection_name=collection_name) # type: ignore + assert [r.id for r in res] == ["2", "3"] # type: ignore From 2354a49a805aa740e0c44f75c489879f8ed83312 Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Fri, 25 Oct 2024 12:41:55 +1000 Subject: [PATCH 08/12] update tests --- .../src/autogen_ext/storage/__init__.py | 4 +- .../src/autogen_ext/storage/_chromadb.py | 37 ++-- .../tests/storage/test_chroma_db.py | 182 +++++++++++++++--- 3 files changed, 174 insertions(+), 49 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/__init__.py b/python/packages/autogen-ext/src/autogen_ext/storage/__init__.py index 523de3f21c4e..e9c14adfb670 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/__init__.py @@ -1,4 +1,4 @@ -from ._chromadb import ChromaVectorDB +from ._chromadb import AsyncChromaVectorDB, ChromaVectorDB from ._factory import VectorDBFactory -__all__ = ["ChromaVectorDB", "VectorDBFactory"] +__all__ = ["ChromaVectorDB", "AsyncChromaVectorDB", "VectorDBFactory"] diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py index 803bff243bde..a808828a006d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py @@ -103,12 +103,11 @@ def __init__( def create_collection( self, collection_name: str, overwrite: bool = False, get_or_create: bool = True ) -> "Collection": - """ - Create a collection in the vector database. + """Create a collection in the vector database. + Case 1. if the collection does not exist, create the collection. Case 2. the collection exists, if overwrite is True, it will overwrite the collection. - Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, - otherwise it raises a ValueError. + Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, otherwise it raises a ValueError. Args: collection_name: str | The name of the collection. @@ -147,8 +146,7 @@ def create_collection( raise ValueError(f"Collection {collection_name} already exists.") def get_collection(self, collection_name: Optional[str] = None) -> "Collection": - """ - Get the collection from the vector database. + """Get the collection from the vector database. Args: collection_name: Optional[str] | The name of the collection. Default is None. @@ -429,12 +427,11 @@ def __init__( self.active_collection: Optional[Any] = None async def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: - """ - Create a collection in the vector database. + """Create a collection in the vector database. + Case 1. if the collection does not exist, create the collection. Case 2. the collection exists, if overwrite is True, it will overwrite the collection. - Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, - otherwise it raises a ValueError. + Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, otherwise it raises a ValueError. Args: collection_name: str | The name of the collection. @@ -712,16 +709,20 @@ def _chroma_results_to_query_results( for i in range(len(data_special_key)): sub_result: List[Tuple[Document, float]] = [] - ids = data_dict["ids"][i] - documents = data_dict.get("documents") or [None] * len(ids) # type: ignore - metadatas = data_dict.get("metadatas") or [None] * len(ids) # type: ignore - embeddings = data_dict.get("embeddings") or [None] * len(ids) + ids_i = data_dict["ids"][i] + documents_list = data_dict.get("documents") + documents_i = documents_list[i] if documents_list else [None] * len(ids_i) # type: ignore + metadatas_list = data_dict.get("metadatas") + metadatas_i = metadatas_list[i] if metadatas_list else [None] * len(ids_i) # type: ignore + embeddings_list = data_dict.get("embeddings") + embeddings_i = embeddings_list[i] if embeddings_list else [None] * len(ids_i) + for j in range(len(data_special_key[i])): document = Document( - id=ids[j], - content=cast(Optional[str], documents[j]), - metadata=cast(Optional[Metadata], metadatas[j]), - embedding=cast(Optional[Vector], embeddings[j]), + id=ids_i[j], + content=documents_i[j], + metadata=cast(Optional[Metadata], metadatas_i[j]), + embedding=cast(Optional[Vector], embeddings_i[j]), ) value = data_special_key[i][j] sub_result.append((document, value)) diff --git a/python/packages/autogen-ext/tests/storage/test_chroma_db.py b/python/packages/autogen-ext/tests/storage/test_chroma_db.py index 9a16b25a68de..ad894f172076 100644 --- a/python/packages/autogen-ext/tests/storage/test_chroma_db.py +++ b/python/packages/autogen-ext/tests/storage/test_chroma_db.py @@ -1,65 +1,189 @@ +# packages/autogen-ext/tests/storage/test_chroma_db.py +import asyncio +from typing import AsyncGenerator, Generator + import pytest -from autogen_ext.storage import ChromaVectorDB +import pytest_asyncio +from autogen_ext.storage import AsyncChromaVectorDB, ChromaVectorDB from autogen_ext.storage._base import Document from chromadb.errors import ChromaError -# @pytest.mark.skipif(skip, reason="dependency is not installed") -def test_chromadb() -> None: - # test create collection - db = ChromaVectorDB(path=".db") - collection_name = "test_collection" +# Fixture for the synchronous database instance with module-level scope +@pytest.fixture(scope="module") +def db() -> Generator[ChromaVectorDB, None, None]: + db_instance = ChromaVectorDB(path=".db") + yield db_instance + # Teardown code if necessary + db_instance.client.reset() # Or any other cleanup if needed + + +# Fixture for the collection name +@pytest.fixture(scope="module") +def collection_name() -> str: + return "test_collection" + + +def test_create_collection(db: ChromaVectorDB, collection_name: str) -> None: collection = db.create_collection(collection_name, overwrite=True, get_or_create=True) assert collection.name == collection_name - # test_delete_collection + +def test_delete_collection(db: ChromaVectorDB, collection_name: str) -> None: db.delete_collection(collection_name) - pytest.raises((ValueError, ChromaError), db.get_collection, collection_name) + with pytest.raises((ValueError, ChromaError)): + db.get_collection(collection_name) - # test more create collection + +def test_more_create_collection(db: ChromaVectorDB, collection_name: str) -> None: collection = db.create_collection(collection_name, overwrite=False, get_or_create=False) assert collection.name == collection_name - pytest.raises( - (ValueError, ChromaError), db.create_collection, collection_name, overwrite=False, get_or_create=False - ) + with pytest.raises((ValueError, ChromaError)): + db.create_collection(collection_name, overwrite=False, get_or_create=False) collection = db.create_collection(collection_name, overwrite=True, get_or_create=False) assert collection.name == collection_name collection = db.create_collection(collection_name, overwrite=False, get_or_create=True) assert collection.name == collection_name - # test_get_collection + +def test_get_collection(db: ChromaVectorDB, collection_name: str) -> None: collection = db.get_collection(collection_name) assert collection.name == collection_name - # test_insert_docs + +def test_insert_docs(db: ChromaVectorDB, collection_name: str) -> None: docs = [Document(content="doc1", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] db.insert_docs(docs, collection_name, upsert=False) res = db.get_collection(collection_name).get(["1", "2"]) assert res["documents"] == ["doc1", "doc2"] - # test_update_docs + +def test_update_docs(db: ChromaVectorDB, collection_name: str) -> None: docs = [Document(content="doc11", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] db.update_docs(docs, collection_name) res = db.get_collection(collection_name).get(["1", "2"]) assert res["documents"] == ["doc11", "doc2"] - # test_delete_docs + +def test_delete_docs(db: ChromaVectorDB, collection_name: str) -> None: ids = ["1"] - collection_name = "test_collection" db.delete_docs(ids, collection_name) res = db.get_collection(collection_name).get(ids) assert res["documents"] == [] - # test_retrieve_docs + +def test_retrieve_docs(db: ChromaVectorDB, collection_name: str) -> None: + queries = ["doc2", "doc3"] + res = db.retrieve_docs(queries, collection_name) + assert [[r[0].id for r in rr] for rr in res] == [["2", "3"], ["3", "2"]] + res = db.retrieve_docs(queries, collection_name, distance_threshold=0.1) + assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] + + +def test_get_docs_by_ids(db: ChromaVectorDB, collection_name: str) -> None: + res = db.get_docs_by_ids(["1", "2"], collection_name) + assert [r.id for r in res] == ["2"] + res = db.get_docs_by_ids(collection_name=collection_name) + assert [r.id for r in res] == ["2", "3"] + + +# Fixture for the event loop (required by pytest-asyncio) +@pytest.fixture(scope="module") +def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: + loop = asyncio.new_event_loop() + yield loop + loop.close() + + +# Fixture for the asynchronous database instance with module-level scope +@pytest_asyncio.fixture(scope="module") # type: ignore +async def async_db() -> AsyncGenerator[AsyncChromaVectorDB, None]: + # Provide an embedding function compatible with async context + async def embedding_function(texts: list[str]) -> list[list[float]]: + # Dummy embedding function; replace with actual implementation + return [[0.0] * 384 for _ in texts] + + db_instance = AsyncChromaVectorDB(embedding_function=embedding_function) # type: ignore + yield db_instance + await db_instance.client.reset() # Or any other cleanup if needed + + +# Fixture for the asynchronous collection name +@pytest.fixture(scope="module") +def async_collection_name() -> str: + return "test_async_collection" + + +@pytest.mark.asyncio +async def test_async_create_collection(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: + collection = await async_db.create_collection(async_collection_name, overwrite=True, get_or_create=True) + assert collection.name == async_collection_name + + +@pytest.mark.asyncio +async def test_async_delete_collection(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: + await async_db.delete_collection(async_collection_name) + with pytest.raises((ValueError, ChromaError)): + await async_db.get_collection(async_collection_name) + + +@pytest.mark.asyncio +async def test_async_more_create_collection(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: + collection = await async_db.create_collection(async_collection_name, overwrite=False, get_or_create=False) + assert collection.name == async_collection_name + with pytest.raises((ValueError, ChromaError)): + await async_db.create_collection(async_collection_name, overwrite=False, get_or_create=False) + collection = await async_db.create_collection(async_collection_name, overwrite=True, get_or_create=False) + assert collection.name == async_collection_name + collection = await async_db.create_collection(async_collection_name, overwrite=False, get_or_create=True) + assert collection.name == async_collection_name + + +@pytest.mark.asyncio +async def test_async_get_collection(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: + collection = await async_db.get_collection(async_collection_name) + assert collection.name == async_collection_name + + +@pytest.mark.asyncio +async def test_async_insert_docs(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: + docs = [Document(content="doc1", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] + await async_db.insert_docs(docs, async_collection_name, upsert=False) + collection = await async_db.get_collection(async_collection_name) + res = await collection.get(["1", "2"]) + assert res["documents"] == ["doc1", "doc2"] + + +@pytest.mark.asyncio +async def test_async_update_docs(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: + docs = [Document(content="doc11", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] + await async_db.update_docs(docs, async_collection_name) + collection = await async_db.get_collection(async_collection_name) + res = await collection.get(["1", "2"]) + assert res["documents"] == ["doc11", "doc2"] + + +@pytest.mark.asyncio +async def test_async_delete_docs(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: + ids = ["1"] + await async_db.delete_docs(ids, async_collection_name) + collection = await async_db.get_collection(async_collection_name) + res = await collection.get(ids) + assert res["documents"] == [] + + +@pytest.mark.asyncio +async def test_async_retrieve_docs(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: queries = ["doc2", "doc3"] - collection_name = "test_collection" - res = db.retrieve_docs(queries, collection_name) # type: ignore - assert [[r[0].id for r in rr] for rr in res] == [["2", "3"], ["3", "2"]] # type: ignore - res = db.retrieve_docs(queries, collection_name, distance_threshold=0.1) # type: ignore - assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] # type: ignore - - # test_get_docs_by_ids - res = db.get_docs_by_ids(["1", "2"], collection_name) # type: ignore - assert [r.id for r in res] == ["2"] # type: ignore - res = db.get_docs_by_ids(collection_name=collection_name) # type: ignore - assert [r.id for r in res] == ["2", "3"] # type: ignore + res = await async_db.retrieve_docs(queries, async_collection_name) + assert [[r[0].id for r in rr] for rr in res] == [["2", "3"], ["3", "2"]] + res = await async_db.retrieve_docs(queries, async_collection_name, distance_threshold=0.1) + assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] + + +@pytest.mark.asyncio +async def test_async_get_docs_by_ids(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: + res = await async_db.get_docs_by_ids(["1", "2"], async_collection_name) + assert [r.id for r in res] == ["2"] + res = await async_db.get_docs_by_ids(collection_name=async_collection_name) + assert [r.id for r in res] == ["2", "3"] From ab7fd07bc2d0901e74076db6e67d4e3ab38b06e6 Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Fri, 25 Oct 2024 15:07:51 +1000 Subject: [PATCH 09/12] fix tests --- .../tests/storage/test_chroma_db.py | 116 +----------------- 1 file changed, 6 insertions(+), 110 deletions(-) diff --git a/python/packages/autogen-ext/tests/storage/test_chroma_db.py b/python/packages/autogen-ext/tests/storage/test_chroma_db.py index ad894f172076..4664324f5c81 100644 --- a/python/packages/autogen-ext/tests/storage/test_chroma_db.py +++ b/python/packages/autogen-ext/tests/storage/test_chroma_db.py @@ -1,21 +1,19 @@ -# packages/autogen-ext/tests/storage/test_chroma_db.py -import asyncio -from typing import AsyncGenerator, Generator +from typing import Generator import pytest -import pytest_asyncio -from autogen_ext.storage import AsyncChromaVectorDB, ChromaVectorDB +from autogen_ext.storage import ChromaVectorDB from autogen_ext.storage._base import Document +from chromadb.config import Settings from chromadb.errors import ChromaError # Fixture for the synchronous database instance with module-level scope @pytest.fixture(scope="module") def db() -> Generator[ChromaVectorDB, None, None]: - db_instance = ChromaVectorDB(path=".db") + db_instance = ChromaVectorDB(path=".db", settings=Settings(allow_reset=True)) yield db_instance - # Teardown code if necessary - db_instance.client.reset() # Or any other cleanup if needed + # Teardown code + db_instance.client.reset() # Fixture for the collection name @@ -85,105 +83,3 @@ def test_get_docs_by_ids(db: ChromaVectorDB, collection_name: str) -> None: assert [r.id for r in res] == ["2"] res = db.get_docs_by_ids(collection_name=collection_name) assert [r.id for r in res] == ["2", "3"] - - -# Fixture for the event loop (required by pytest-asyncio) -@pytest.fixture(scope="module") -def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: - loop = asyncio.new_event_loop() - yield loop - loop.close() - - -# Fixture for the asynchronous database instance with module-level scope -@pytest_asyncio.fixture(scope="module") # type: ignore -async def async_db() -> AsyncGenerator[AsyncChromaVectorDB, None]: - # Provide an embedding function compatible with async context - async def embedding_function(texts: list[str]) -> list[list[float]]: - # Dummy embedding function; replace with actual implementation - return [[0.0] * 384 for _ in texts] - - db_instance = AsyncChromaVectorDB(embedding_function=embedding_function) # type: ignore - yield db_instance - await db_instance.client.reset() # Or any other cleanup if needed - - -# Fixture for the asynchronous collection name -@pytest.fixture(scope="module") -def async_collection_name() -> str: - return "test_async_collection" - - -@pytest.mark.asyncio -async def test_async_create_collection(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - collection = await async_db.create_collection(async_collection_name, overwrite=True, get_or_create=True) - assert collection.name == async_collection_name - - -@pytest.mark.asyncio -async def test_async_delete_collection(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - await async_db.delete_collection(async_collection_name) - with pytest.raises((ValueError, ChromaError)): - await async_db.get_collection(async_collection_name) - - -@pytest.mark.asyncio -async def test_async_more_create_collection(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - collection = await async_db.create_collection(async_collection_name, overwrite=False, get_or_create=False) - assert collection.name == async_collection_name - with pytest.raises((ValueError, ChromaError)): - await async_db.create_collection(async_collection_name, overwrite=False, get_or_create=False) - collection = await async_db.create_collection(async_collection_name, overwrite=True, get_or_create=False) - assert collection.name == async_collection_name - collection = await async_db.create_collection(async_collection_name, overwrite=False, get_or_create=True) - assert collection.name == async_collection_name - - -@pytest.mark.asyncio -async def test_async_get_collection(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - collection = await async_db.get_collection(async_collection_name) - assert collection.name == async_collection_name - - -@pytest.mark.asyncio -async def test_async_insert_docs(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - docs = [Document(content="doc1", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] - await async_db.insert_docs(docs, async_collection_name, upsert=False) - collection = await async_db.get_collection(async_collection_name) - res = await collection.get(["1", "2"]) - assert res["documents"] == ["doc1", "doc2"] - - -@pytest.mark.asyncio -async def test_async_update_docs(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - docs = [Document(content="doc11", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] - await async_db.update_docs(docs, async_collection_name) - collection = await async_db.get_collection(async_collection_name) - res = await collection.get(["1", "2"]) - assert res["documents"] == ["doc11", "doc2"] - - -@pytest.mark.asyncio -async def test_async_delete_docs(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - ids = ["1"] - await async_db.delete_docs(ids, async_collection_name) - collection = await async_db.get_collection(async_collection_name) - res = await collection.get(ids) - assert res["documents"] == [] - - -@pytest.mark.asyncio -async def test_async_retrieve_docs(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - queries = ["doc2", "doc3"] - res = await async_db.retrieve_docs(queries, async_collection_name) - assert [[r[0].id for r in rr] for rr in res] == [["2", "3"], ["3", "2"]] - res = await async_db.retrieve_docs(queries, async_collection_name, distance_threshold=0.1) - assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] - - -@pytest.mark.asyncio -async def test_async_get_docs_by_ids(async_db: AsyncChromaVectorDB, async_collection_name: str) -> None: - res = await async_db.get_docs_by_ids(["1", "2"], async_collection_name) - assert [r.id for r in res] == ["2"] - res = await async_db.get_docs_by_ids(collection_name=async_collection_name) - assert [r.id for r in res] == ["2", "3"] From 5745849826c589697974519eed07151ecb108f88 Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Fri, 25 Oct 2024 17:23:45 +1000 Subject: [PATCH 10/12] add parallel test setup --- .../tests/storage/test_chroma_db.py | 83 +++++++++++++++---- 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/python/packages/autogen-ext/tests/storage/test_chroma_db.py b/python/packages/autogen-ext/tests/storage/test_chroma_db.py index 4664324f5c81..a6d45ea531ca 100644 --- a/python/packages/autogen-ext/tests/storage/test_chroma_db.py +++ b/python/packages/autogen-ext/tests/storage/test_chroma_db.py @@ -16,10 +16,18 @@ def db() -> Generator[ChromaVectorDB, None, None]: db_instance.client.reset() -# Fixture for the collection name -@pytest.fixture(scope="module") -def collection_name() -> str: - return "test_collection" +# Fixture for unique collection names per test +@pytest.fixture(scope="function") +def collection_name(request) -> str: + return f"test_collection_{request.node.name}" + + +# Fixture to create and delete the collection around each test +@pytest.fixture(scope="function") +def collection(db: ChromaVectorDB, collection_name: str): + collection = db.create_collection(collection_name, overwrite=True, get_or_create=True) + yield collection + db.delete_collection(collection_name) def test_create_collection(db: ChromaVectorDB, collection_name: str) -> None: @@ -28,12 +36,20 @@ def test_create_collection(db: ChromaVectorDB, collection_name: str) -> None: def test_delete_collection(db: ChromaVectorDB, collection_name: str) -> None: + # Create the collection first + db.create_collection(collection_name, overwrite=True, get_or_create=True) db.delete_collection(collection_name) with pytest.raises((ValueError, ChromaError)): db.get_collection(collection_name) def test_more_create_collection(db: ChromaVectorDB, collection_name: str) -> None: + # Ensure the collection is deleted at the start + try: + db.delete_collection(collection_name) + except (ValueError, ChromaError): + pass + collection = db.create_collection(collection_name, overwrite=False, get_or_create=False) assert collection.name == collection_name with pytest.raises((ValueError, ChromaError)): @@ -44,33 +60,63 @@ def test_more_create_collection(db: ChromaVectorDB, collection_name: str) -> Non assert collection.name == collection_name -def test_get_collection(db: ChromaVectorDB, collection_name: str) -> None: - collection = db.get_collection(collection_name) - assert collection.name == collection_name +def test_get_collection(db: ChromaVectorDB, collection_name: str, collection) -> None: + retrieved_collection = db.get_collection(collection_name) + assert retrieved_collection.name == collection_name -def test_insert_docs(db: ChromaVectorDB, collection_name: str) -> None: - docs = [Document(content="doc1", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] +def test_insert_docs(db: ChromaVectorDB, collection_name: str, collection) -> None: + docs = [ + Document(content="doc1", id="1"), + Document(content="doc2", id="2"), + Document(content="doc3", id="3"), + ] db.insert_docs(docs, collection_name, upsert=False) res = db.get_collection(collection_name).get(["1", "2"]) assert res["documents"] == ["doc1", "doc2"] -def test_update_docs(db: ChromaVectorDB, collection_name: str) -> None: - docs = [Document(content="doc11", id="1"), Document(content="doc2", id="2"), Document(content="doc3", id="3")] - db.update_docs(docs, collection_name) +def test_update_docs(db: ChromaVectorDB, collection_name: str, collection) -> None: + # Insert initial docs + initial_docs = [ + Document(content="doc1", id="1"), + Document(content="doc2", id="2"), + ] + db.insert_docs(initial_docs, collection_name, upsert=False) + + # Now update + updated_docs = [ + Document(content="doc11", id="1"), + Document(content="doc2", id="2"), + Document(content="doc3", id="3"), + ] + db.update_docs(updated_docs, collection_name) res = db.get_collection(collection_name).get(["1", "2"]) assert res["documents"] == ["doc11", "doc2"] -def test_delete_docs(db: ChromaVectorDB, collection_name: str) -> None: +def test_delete_docs(db: ChromaVectorDB, collection_name: str, collection) -> None: + # Insert initial docs + initial_docs = [ + Document(content="doc1", id="1"), + Document(content="doc2", id="2"), + ] + db.insert_docs(initial_docs, collection_name, upsert=False) + ids = ["1"] db.delete_docs(ids, collection_name) res = db.get_collection(collection_name).get(ids) assert res["documents"] == [] -def test_retrieve_docs(db: ChromaVectorDB, collection_name: str) -> None: +def test_retrieve_docs(db: ChromaVectorDB, collection_name: str, collection) -> None: + # Insert initial docs + initial_docs = [ + Document(content="doc2", id="2"), + Document(content="doc3", id="3"), + ] + db.insert_docs(initial_docs, collection_name, upsert=False) + queries = ["doc2", "doc3"] res = db.retrieve_docs(queries, collection_name) assert [[r[0].id for r in rr] for rr in res] == [["2", "3"], ["3", "2"]] @@ -78,7 +124,14 @@ def test_retrieve_docs(db: ChromaVectorDB, collection_name: str) -> None: assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] -def test_get_docs_by_ids(db: ChromaVectorDB, collection_name: str) -> None: +def test_get_docs_by_ids(db: ChromaVectorDB, collection_name: str, collection) -> None: + # Insert initial docs + initial_docs = [ + Document(content="doc2", id="2"), + Document(content="doc3", id="3"), + ] + db.insert_docs(initial_docs, collection_name, upsert=False) + res = db.get_docs_by_ids(["1", "2"], collection_name) assert [r.id for r in res] == ["2"] res = db.get_docs_by_ids(collection_name=collection_name) From 4391961400a440d35d5f4071afc23b8031ccbb3b Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Fri, 25 Oct 2024 17:38:42 +1000 Subject: [PATCH 11/12] test fix --- .../tests/storage/test_chroma_db.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/python/packages/autogen-ext/tests/storage/test_chroma_db.py b/python/packages/autogen-ext/tests/storage/test_chroma_db.py index a6d45ea531ca..9a11cfa88a75 100644 --- a/python/packages/autogen-ext/tests/storage/test_chroma_db.py +++ b/python/packages/autogen-ext/tests/storage/test_chroma_db.py @@ -1,16 +1,19 @@ +from pathlib import Path from typing import Generator import pytest from autogen_ext.storage import ChromaVectorDB from autogen_ext.storage._base import Document +from chromadb import Collection from chromadb.config import Settings from chromadb.errors import ChromaError -# Fixture for the synchronous database instance with module-level scope -@pytest.fixture(scope="module") -def db() -> Generator[ChromaVectorDB, None, None]: - db_instance = ChromaVectorDB(path=".db", settings=Settings(allow_reset=True)) +# Fixture for the synchronous database instance with function-level scope +@pytest.fixture(scope="function") +def db(tmp_path: Path) -> Generator[ChromaVectorDB, None, None]: + db_path = tmp_path / "test_db" + db_instance = ChromaVectorDB(path=str(db_path), settings=Settings(allow_reset=True)) yield db_instance # Teardown code db_instance.client.reset() @@ -18,13 +21,13 @@ def db() -> Generator[ChromaVectorDB, None, None]: # Fixture for unique collection names per test @pytest.fixture(scope="function") -def collection_name(request) -> str: - return f"test_collection_{request.node.name}" +def collection_name(request: pytest.FixtureRequest) -> str: + return f"test_collection_{request.node.name}" # type: ignore # Fixture to create and delete the collection around each test @pytest.fixture(scope="function") -def collection(db: ChromaVectorDB, collection_name: str): +def collection(db: ChromaVectorDB, collection_name: str) -> Generator[Collection, None, None]: collection = db.create_collection(collection_name, overwrite=True, get_or_create=True) yield collection db.delete_collection(collection_name) @@ -60,12 +63,12 @@ def test_more_create_collection(db: ChromaVectorDB, collection_name: str) -> Non assert collection.name == collection_name -def test_get_collection(db: ChromaVectorDB, collection_name: str, collection) -> None: +def test_get_collection(db: ChromaVectorDB, collection_name: str, collection: Collection) -> None: retrieved_collection = db.get_collection(collection_name) assert retrieved_collection.name == collection_name -def test_insert_docs(db: ChromaVectorDB, collection_name: str, collection) -> None: +def test_insert_docs(db: ChromaVectorDB, collection_name: str, collection: Collection) -> None: docs = [ Document(content="doc1", id="1"), Document(content="doc2", id="2"), @@ -76,7 +79,7 @@ def test_insert_docs(db: ChromaVectorDB, collection_name: str, collection) -> No assert res["documents"] == ["doc1", "doc2"] -def test_update_docs(db: ChromaVectorDB, collection_name: str, collection) -> None: +def test_update_docs(db: ChromaVectorDB, collection_name: str, collection: Collection) -> None: # Insert initial docs initial_docs = [ Document(content="doc1", id="1"), @@ -95,7 +98,7 @@ def test_update_docs(db: ChromaVectorDB, collection_name: str, collection) -> No assert res["documents"] == ["doc11", "doc2"] -def test_delete_docs(db: ChromaVectorDB, collection_name: str, collection) -> None: +def test_delete_docs(db: ChromaVectorDB, collection_name: str, collection: Collection) -> None: # Insert initial docs initial_docs = [ Document(content="doc1", id="1"), @@ -109,7 +112,7 @@ def test_delete_docs(db: ChromaVectorDB, collection_name: str, collection) -> No assert res["documents"] == [] -def test_retrieve_docs(db: ChromaVectorDB, collection_name: str, collection) -> None: +def test_retrieve_docs(db: ChromaVectorDB, collection_name: str, collection: Collection) -> None: # Insert initial docs initial_docs = [ Document(content="doc2", id="2"), @@ -124,7 +127,7 @@ def test_retrieve_docs(db: ChromaVectorDB, collection_name: str, collection) -> assert [[r[0].id for r in rr] for rr in res] == [["2"], ["3"]] -def test_get_docs_by_ids(db: ChromaVectorDB, collection_name: str, collection) -> None: +def test_get_docs_by_ids(db: ChromaVectorDB, collection_name: str, collection: Collection) -> None: # Insert initial docs initial_docs = [ Document(content="doc2", id="2"), From 5116a52ba72bdd39a671a31c4e18834d275aa005 Mon Sep 17 00:00:00 2001 From: Leonardo Pinheiro Date: Tue, 29 Oct 2024 19:06:29 +1000 Subject: [PATCH 12/12] add create collection kwargs --- .../src/autogen_ext/storage/_base.py | 13 +++++++-- .../src/autogen_ext/storage/_chromadb.py | 27 ++++++++++++------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py index d2a53e7bd5ac..dff99cf04ec4 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_base.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_base.py @@ -60,7 +60,13 @@ class AsyncVectorDB(Protocol): type: str = "" embedding_function: Optional[Callable[..., Any]] = None # embeddings = embedding_function(sentences) - async def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: + async def create_collection( + self, + collection_name: str, + overwrite: bool = False, + get_or_create: bool = True, + **kwargs: Any, + ) -> Any: """ Create a collection in the vector database. Case 1. if the collection does not exist, create the collection. @@ -72,6 +78,7 @@ async def create_collection(self, collection_name: str, overwrite: bool = False, collection_name: str | The name of the collection. overwrite: bool | Whether to overwrite the collection if it exists. Default is False. get_or_create: bool | Whether to get the collection if it exists. Default is True. + kwargs: Dict[str, Any] | Additional keyword arguments for collection creation (e.g. schema). Returns: Any | The collection object. @@ -228,7 +235,9 @@ class VectorDB(Protocol): None # embeddings = embedding_function(sentences) ) - def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: + def create_collection( + self, collection_name: str, overwrite: bool = False, get_or_create: bool = True, **kwargs: Any + ) -> Any: """ Create a collection in the vector database. Case 1. if the collection does not exist, create the collection. diff --git a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py index a808828a006d..bd7c17085dbb 100644 --- a/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py +++ b/python/packages/autogen-ext/src/autogen_ext/storage/_chromadb.py @@ -101,7 +101,7 @@ def __init__( self.active_collection: Optional["Collection"] = None def create_collection( - self, collection_name: str, overwrite: bool = False, get_or_create: bool = True + self, collection_name: str, overwrite: bool = False, get_or_create: bool = True, **kwargs: Any ) -> "Collection": """Create a collection in the vector database. @@ -130,15 +130,17 @@ def create_collection( if collection is None: return self.client.create_collection( name=collection_name, - embedding_function=self.embedding_function, - metadata=self.metadata, + embedding_function=kwargs.pop("embedding_function", self.embedding_function), + metadata=kwargs.pop("metadata", self.metadata), + data_loader=kwargs.pop("data_loader", None), ) elif overwrite: self.client.delete_collection(name=collection_name) return self.client.create_collection( name=collection_name, - embedding_function=self.embedding_function, - metadata=self.metadata, + embedding_function=kwargs.pop("embedding_function", self.embedding_function), + metadata=kwargs.pop("metadata", self.metadata), + data_loader=kwargs.pop("data_loader", None), ) elif get_or_create: return collection @@ -426,7 +428,9 @@ def __init__( ) self.active_collection: Optional[Any] = None - async def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: + async def create_collection( + self, collection_name: str, overwrite: bool = False, get_or_create: bool = True, **kwargs: Any + ) -> Any: """Create a collection in the vector database. Case 1. if the collection does not exist, create the collection. @@ -438,6 +442,7 @@ async def create_collection(self, collection_name: str, overwrite: bool = False, overwrite: bool | Whether to overwrite the collection if it exists. Default is False. get_or_create: bool | Whether to get the collection if it exists. Default is True. + Returns: Any | The collection object. """ @@ -453,15 +458,17 @@ async def create_collection(self, collection_name: str, overwrite: bool = False, if collection is None: return await self.client.create_collection( name=collection_name, - embedding_function=self.embedding_function, - metadata={}, + embedding_function=kwargs.pop("embedding_function", self.embedding_function), + metadata=kwargs.pop("metadata", {}), + data_loader=kwargs.pop("data_loader", None), ) elif overwrite: await self.client.delete_collection(name=collection_name) return await self.client.create_collection( name=collection_name, - embedding_function=self.embedding_function, - metadata={}, + embedding_function=kwargs.pop("embedding_function", self.embedding_function), + metadata=kwargs.pop("metadata", {}), + data_loader=kwargs.pop("data_loader", None), ) elif get_or_create: return collection