diff --git a/docs/extras/integrations/llms/ollama.ipynb b/docs/extras/integrations/llms/ollama.ipynb index 8cfd71cbb3e12..55e77871c6b50 100644 --- a/docs/extras/integrations/llms/ollama.ipynb +++ b/docs/extras/integrations/llms/ollama.ipynb @@ -106,6 +106,25 @@ "llm(\"Tell me about the history of AI\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ollama supports embeddings via `OllamaEmbeddings`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import OllamaEmbeddings\n", + "oembed = OllamaEmbeddings(base_url=\"http://localhost:11434\", model=\"llama2\")\n", + "\n", + "oembed.embed_query(\"Llamas are social animals and live with others as a herd.\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -121,7 +140,7 @@ "ollama run llama2:13b \n", "```\n", "\n", - "Let's also use local embeddings from `GPT4AllEmbeddings` and `Chroma`." + "Let's also use local embeddings from `OllamaEmbeddings` and `Chroma`." ] }, { @@ -163,9 +182,9 @@ ], "source": [ "from langchain.vectorstores import Chroma\n", - "from langchain.embeddings import GPT4AllEmbeddings\n", + "from langchain.embeddings import OllamaEmbeddings\n", "\n", - "vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())" + "vectorstore = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings())" ] }, { @@ -353,7 +372,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/docs/extras/integrations/text_embedding/ollama.ipynb b/docs/extras/integrations/text_embedding/ollama.ipynb new file mode 100644 index 0000000000000..eee011fc73ede --- /dev/null +++ b/docs/extras/integrations/text_embedding/ollama.ipynb @@ -0,0 +1,228 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "278b6c63", + "metadata": {}, + "source": [ + "# Ollama\n", + "\n", + "Let's load the Ollama Embeddings class." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0be1af71", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import OllamaEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2c66e5da", + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = OllamaEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "01370375", + "metadata": {}, + "outputs": [], + "source": [ + "text = \"This is a test document.\"" + ] + }, + { + "cell_type": "markdown", + "id": "a42e4035", + "metadata": {}, + "source": [ + "To generate embeddings, you can either query an invidivual text, or you can query a list of texts." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "91bc875d-829b-4c3d-8e6f-fc2dda30a3bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-0.09996652603149414,\n", + " 0.015568195842206478,\n", + " 0.17670190334320068,\n", + " 0.16521021723747253,\n", + " 0.21193109452724457]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_result = embeddings.embed_query(text)\n", + "query_result[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a4b0d49e-0c73-44b6-aed5-5b426564e085", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-0.04242777079343796,\n", + " 0.016536075621843338,\n", + " 0.10052520781755447,\n", + " 0.18272875249385834,\n", + " 0.2079043835401535]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc_result = embeddings.embed_documents([text])\n", + "doc_result[0][:5]" + ] + }, + { + "cell_type": "markdown", + "id": "bb61bbeb", + "metadata": {}, + "source": [ + "Let's load the Ollama Embeddings class with smaller model (e.g. llama:7b). Note: See other supported models [https://ollama.ai/library](https://ollama.ai/library)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a56b70f5", + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = OllamaEmbeddings(model=\"llama2:7b\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "14aefb64", + "metadata": {}, + "outputs": [], + "source": [ + "text = \"This is a test document.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3c39ed33", + "metadata": {}, + "outputs": [], + "source": [ + "query_result = embeddings.embed_query(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "2ee7ce9f-d506-4810-8897-e44334412714", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-0.09996627271175385,\n", + " 0.015567859634757042,\n", + " 0.17670205235481262,\n", + " 0.16521376371383667,\n", + " 0.21193283796310425]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_result[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e3221db6", + "metadata": {}, + "outputs": [], + "source": [ + "doc_result = embeddings.embed_documents([text])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a0865409-3a6d-468f-939f-abde17c7cac3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-0.042427532374858856,\n", + " 0.01653730869293213,\n", + " 0.10052604228258133,\n", + " 0.18272635340690613,\n", + " 0.20790338516235352]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc_result[0][:5]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 32fdc9472720c..621b88a628ad1 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -49,6 +49,7 @@ from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings from langchain.embeddings.nlpcloud import NLPCloudEmbeddings from langchain.embeddings.octoai_embeddings import OctoAIEmbeddings +from langchain.embeddings.ollama import OllamaEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings.sagemaker_endpoint import SagemakerEndpointEmbeddings from langchain.embeddings.self_hosted import SelfHostedEmbeddings @@ -106,6 +107,7 @@ "AwaEmbeddings", "HuggingFaceBgeEmbeddings", "ErnieEmbeddings", + "OllamaEmbeddings", "QianfanEmbeddingsEndpoint", ] diff --git a/libs/langchain/langchain/embeddings/ollama.py b/libs/langchain/langchain/embeddings/ollama.py new file mode 100644 index 0000000000000..c610cabdd4ab3 --- /dev/null +++ b/libs/langchain/langchain/embeddings/ollama.py @@ -0,0 +1,205 @@ +from typing import Any, Dict, List, Mapping, Optional + +import requests + +from langchain.embeddings.base import Embeddings +from langchain.pydantic_v1 import BaseModel, Extra + + +class OllamaEmbeddings(BaseModel, Embeddings): + """Ollama locally runs large language models. + + To use, follow the instructions at https://ollama.ai/. + + Example: + .. code-block:: python + + from langchain.embeddings import OllamaEmbeddings + ollama_emb = OllamaEmbeddings( + model="llama:7b", + ) + r1 = ollama_emb.embed_documents( + [ + "Alpha is the first letter of Greek alphabet", + "Beta is the second letter of Greek alphabet", + ] + ) + r2 = ollama_emb.embed_query( + "What is the second letter of Greek alphabet" + ) + + """ + + base_url: str = "http://localhost:11434" + """Base url the model is hosted under.""" + model: str = "llama2" + """Model name to use.""" + + embed_instruction: str = "passage: " + """Instruction used to embed documents.""" + query_instruction: str = "query: " + """Instruction used to embed the query.""" + + mirostat: Optional[int] + """Enable Mirostat sampling for controlling perplexity. + (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)""" + + mirostat_eta: Optional[float] + """Influences how quickly the algorithm responds to feedback + from the generated text. A lower learning rate will result in + slower adjustments, while a higher learning rate will make + the algorithm more responsive. (Default: 0.1)""" + + mirostat_tau: Optional[float] + """Controls the balance between coherence and diversity + of the output. A lower value will result in more focused and + coherent text. (Default: 5.0)""" + + num_ctx: Optional[int] + """Sets the size of the context window used to generate the + next token. (Default: 2048) """ + + num_gpu: Optional[int] + """The number of GPUs to use. On macOS it defaults to 1 to + enable metal support, 0 to disable.""" + + num_thread: Optional[int] + """Sets the number of threads to use during computation. + By default, Ollama will detect this for optimal performance. + It is recommended to set this value to the number of physical + CPU cores your system has (as opposed to the logical number of cores).""" + + repeat_last_n: Optional[int] + """Sets how far back for the model to look back to prevent + repetition. (Default: 64, 0 = disabled, -1 = num_ctx)""" + + repeat_penalty: Optional[float] + """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) + will penalize repetitions more strongly, while a lower value (e.g., 0.9) + will be more lenient. (Default: 1.1)""" + + temperature: Optional[float] + """The temperature of the model. Increasing the temperature will + make the model answer more creatively. (Default: 0.8)""" + + stop: Optional[List[str]] + """Sets the stop tokens to use.""" + + tfs_z: Optional[float] + """Tail free sampling is used to reduce the impact of less probable + tokens from the output. A higher value (e.g., 2.0) will reduce the + impact more, while a value of 1.0 disables this setting. (default: 1)""" + + top_k: Optional[int] + """Reduces the probability of generating nonsense. A higher value (e.g. 100) + will give more diverse answers, while a lower value (e.g. 10) + will be more conservative. (Default: 40)""" + + top_p: Optional[int] + """Works together with top-k. A higher value (e.g., 0.95) will lead + to more diverse text, while a lower value (e.g., 0.5) will + generate more focused and conservative text. (Default: 0.9)""" + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling Ollama.""" + return { + "model": self.model, + "options": { + "mirostat": self.mirostat, + "mirostat_eta": self.mirostat_eta, + "mirostat_tau": self.mirostat_tau, + "num_ctx": self.num_ctx, + "num_gpu": self.num_gpu, + "num_thread": self.num_thread, + "repeat_last_n": self.repeat_last_n, + "repeat_penalty": self.repeat_penalty, + "temperature": self.temperature, + "stop": self.stop, + "tfs_z": self.tfs_z, + "top_k": self.top_k, + "top_p": self.top_p, + }, + } + + model_kwargs: Optional[dict] = None + """Other model keyword args""" + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model": self.model}, **self._default_params} + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _process_emb_response(self, input: str) -> List[float]: + """Process a response from the API. + + Args: + response: The response from the API. + + Returns: + The response as a dictionary. + """ + headers = { + "Content-Type": "application/json", + } + + try: + res = requests.post( + f"{self.base_url}/api/embeddings", + headers=headers, + json={"model": self.model, "prompt": input, **self._default_params}, + ) + except requests.exceptions.RequestException as e: + raise ValueError(f"Error raised by inference endpoint: {e}") + + if res.status_code != 200: + raise ValueError( + "Error raised by inference API HTTP code: %s, %s" + % (res.status_code, res.text) + ) + try: + t = res.json() + return t["embedding"] + except requests.exceptions.JSONDecodeError as e: + raise ValueError( + f"Error raised by inference API: {e}.\nResponse: {res.text}" + ) + + def _embed(self, input: List[str]) -> List[List[float]]: + embeddings_list: List[List[float]] = [] + for prompt in input: + embeddings = self._process_emb_response(prompt) + embeddings_list.append(embeddings) + + return embeddings_list + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed documents using a Ollama deployed embedding model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + instruction_pairs = [f"{self.embed_instruction}{text}" for text in texts] + embeddings = self._embed(instruction_pairs) + return embeddings + + def embed_query(self, text: str) -> List[float]: + """Embed a query using a Ollama deployed embedding model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + instruction_pair = f"{self.query_instruction}{text}" + embedding = self._embed([instruction_pair])[0] + return embedding