Added embeddings support for ollama (langchain-ai#10124)

- Description: Added support for Ollama embeddings - Issue: the issue # it fixes (if applicable), - Dependencies: N/A - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: @herrjemand cc ollama/ollama#436
apify · Sep 15, 2023 · 5e50b89 · 5e50b89
1 parent 48a4efc
commit 5e50b89
Show file tree

Hide file tree

Showing 4 changed files with 458 additions and 4 deletions.
diff --git a/docs/extras/integrations/llms/ollama.ipynb b/docs/extras/integrations/llms/ollama.ipynb
@@ -106,6 +106,25 @@
     "llm(\"Tell me about the history of AI\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ollama supports embeddings via `OllamaEmbeddings`:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings import OllamaEmbeddings\n",
+    "oembed = OllamaEmbeddings(base_url=\"http://localhost:11434\", model=\"llama2\")\n",
+    "\n",
+    "oembed.embed_query(\"Llamas are social animals and live with others as a herd.\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -121,7 +140,7 @@
     "ollama run llama2:13b \n",
     "```\n",
     "\n",
-    "Let's also use local embeddings from `GPT4AllEmbeddings` and `Chroma`."
+    "Let's also use local embeddings from `OllamaEmbeddings` and `Chroma`."
    ]
   },
   {
@@ -163,9 +182,9 @@
    ],
    "source": [
     "from langchain.vectorstores import Chroma\n",
-    "from langchain.embeddings import GPT4AllEmbeddings\n",
+    "from langchain.embeddings import OllamaEmbeddings\n",
     "\n",
-    "vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())"
+    "vectorstore = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings())"
    ]
   },
   {
@@ -353,7 +372,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,

diff --git a/docs/extras/integrations/text_embedding/ollama.ipynb b/docs/extras/integrations/text_embedding/ollama.ipynb
@@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "278b6c63",
+   "metadata": {},
+   "source": [
+    "# Ollama\n",
+    "\n",
+    "Let's load the Ollama Embeddings class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0be1af71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings import OllamaEmbeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2c66e5da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embeddings = OllamaEmbeddings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "01370375",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This is a test document.\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a42e4035",
+   "metadata": {},
+   "source": [
+    "To generate embeddings, you can either query an invidivual text, or you can query a list of texts."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "91bc875d-829b-4c3d-8e6f-fc2dda30a3bd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.09996652603149414,\n",
+       " 0.015568195842206478,\n",
+       " 0.17670190334320068,\n",
+       " 0.16521021723747253,\n",
+       " 0.21193109452724457]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_result = embeddings.embed_query(text)\n",
+    "query_result[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a4b0d49e-0c73-44b6-aed5-5b426564e085",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.04242777079343796,\n",
+       " 0.016536075621843338,\n",
+       " 0.10052520781755447,\n",
+       " 0.18272875249385834,\n",
+       " 0.2079043835401535]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "doc_result = embeddings.embed_documents([text])\n",
+    "doc_result[0][:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bb61bbeb",
+   "metadata": {},
+   "source": [
+    "Let's load the Ollama Embeddings class with smaller model (e.g. llama:7b). Note: See other supported models [https://ollama.ai/library](https://ollama.ai/library)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "a56b70f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embeddings = OllamaEmbeddings(model=\"llama2:7b\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "14aefb64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This is a test document.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3c39ed33",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_result = embeddings.embed_query(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "2ee7ce9f-d506-4810-8897-e44334412714",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.09996627271175385,\n",
+       " 0.015567859634757042,\n",
+       " 0.17670205235481262,\n",
+       " 0.16521376371383667,\n",
+       " 0.21193283796310425]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_result[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "e3221db6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "doc_result = embeddings.embed_documents([text])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a0865409-3a6d-468f-939f-abde17c7cac3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.042427532374858856,\n",
+       " 0.01653730869293213,\n",
+       " 0.10052604228258133,\n",
+       " 0.18272635340690613,\n",
+       " 0.20790338516235352]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "doc_result[0][:5]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py
@@ -49,6 +49,7 @@
 from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings
 from langchain.embeddings.nlpcloud import NLPCloudEmbeddings
 from langchain.embeddings.octoai_embeddings import OctoAIEmbeddings
+from langchain.embeddings.ollama import OllamaEmbeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.embeddings.sagemaker_endpoint import SagemakerEndpointEmbeddings
 from langchain.embeddings.self_hosted import SelfHostedEmbeddings
@@ -106,6 +107,7 @@
     "AwaEmbeddings",
     "HuggingFaceBgeEmbeddings",
     "ErnieEmbeddings",
+    "OllamaEmbeddings",
     "QianfanEmbeddingsEndpoint",
 ]