From 5e5e1a22550d96a43c4639126dc76e8785096d89 Mon Sep 17 00:00:00 2001 From: Marko Manninen Date: Fri, 10 May 2024 01:38:01 +0300 Subject: [PATCH] Removed local embeddings and left Ollama model for new PR. --- README.md | 14 ------ examples/ollama/README.md | 73 ++---------------------------- examples/ollama/package.json | 3 +- examples/ollama/requirements.txt | 5 -- examples/ollama/server.py | 25 ---------- examples/ollama/src/index.ts | 6 +-- src/embeddings/local-embeddings.ts | 31 ------------- src/index.ts | 2 - 8 files changed, 7 insertions(+), 152 deletions(-) delete mode 100644 examples/ollama/requirements.txt delete mode 100644 examples/ollama/server.py delete mode 100644 src/embeddings/local-embeddings.ts diff --git a/README.md b/README.md index c1e538f0..38ede9ed 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,6 @@ The author(s) are looking to add core maintainers for this opensource project. R - [OpenAI v3 Large](#openai-v3-large) - [Ada](#ada) - [Cohere](#cohere) - - [Local embeddings](#local-embeddings) - [Use custom embedding model](#use-custom-embedding-model) - [More embedding models coming soon](#more-embedding-models-coming-soon) - [Vector databases supported](#vector-databases-supported) @@ -566,19 +565,6 @@ await new RAGApplicationBuilder() .setEmbeddingModel(new CohereEmbeddings()) ``` -## Local embeddings - -Run a local server with embed API endpoint that takes 'texts' as a POST action argument, transforms value to a vector representation, and returns a JSON list. Server may utilize Sentence Transformers 'all-MiniLM-L6-v2' model, for instance. The server address with a port and a model parameter count ('384' for 'all-MiniLM-L6-v2') must be provided in the `LocalEmbeddings` constructor. - -```TS -import { LocalEmbeddings } from '@llm-tools/embedjs'; - -await new RAGApplicationBuilder() -.setEmbeddingModel(new LocalEmbeddings("http://localhost:5000/embed", 384)) -``` - -See `examples/ollama` for a complete example. - ## Use custom embedding model You can use your own custom embedding model by implementing the `BaseEmbeddings` interface. Here's how that would look like - diff --git a/examples/ollama/README.md b/examples/ollama/README.md index 451d916e..e76c1332 100644 --- a/examples/ollama/README.md +++ b/examples/ollama/README.md @@ -1,9 +1,8 @@ ## Requirements -This example consists of a Python Flask application that handles text embeddings and a Node.js application that uses these embeddings with `embedJs` RAG library. - -Main emphasis is on open-source and local running of the RAG application. +This example consists of a Node.js application that uses vector embeddings with `embedJs` RAG library to store text from various sources to database, retrieve them with similarity search and interpret with Ollama LLM. +Main motivation is on the open-source and local running of the RAG application. ### Install NodeJS dependencies @@ -11,64 +10,6 @@ Main emphasis is on open-source and local running of the RAG application. npm install ``` -**WSL note** - -After reinstalling the dependencies, force a rebuild of all native modules to be sure they're compatible with your Linux environment under WSL: - -```bash -npm rebuild --update-binary -``` - -### Install Python dependencies - -To run verctor embedding server with models supported by `SentenceTransformer`: - -```bash -pip install -r requirements.txt -``` - -Be prepared to upgrade some libraries, like huggingface_hub: - -```bash -pip3 install sentence_transformers --upgrade -``` - -### Usage - -To run the full application (both Flask and Node.js apps), execute the following commands. - -Simple start up script run with the default parameters: - -```bash -python server.py -``` - -#### Configurations - -Windows: - -```bash -$env:FLASK_RUN_PORT="5000"; python server.py --model "all-MiniLM-L6-v2" --port 5000 -``` - -Linux/Mac: - -```bash -FLASK_RUN_PORT=5000 python server.py --model "all-MiniLM-L6-v2" --port 5000 & -``` - -Above line starts embedding server as a background service and needs to be killed manually after running the example. - -```bash -$ sudo lsof -i :5000 -``` - --> - -```bash -$ sudo kill portNumber -``` - ### Tesla example You have to had installed ollama ([https://ollama.com/](https://ollama.com/)) and run at least once: @@ -80,13 +21,7 @@ ollama run llama3 Run the "Tesla text" retrieval simple example with default parameters: ```bash -npm start -``` - -#### Configurations - -```bash -npm start -- "llama3" "http://localhost:5000/embed" 384 +npm start -- llama3 ``` -That will output similarity search results interpereted by local Ollama llama3 LLM after the content has been first retrieved from internet and indexed to the in-memory vector database. +That will output similarity search results interpreted by local Ollama llama3 LLM after the content has been first retrieved from internet and indexed to the in-memory vector database. diff --git a/examples/ollama/package.json b/examples/ollama/package.json index 6b8b4077..f8e0d387 100644 --- a/examples/ollama/package.json +++ b/examples/ollama/package.json @@ -4,8 +4,7 @@ "type": "module", "private": true, "scripts": { - "start": "tsc && node dist/examples/ollama/src/index.js", - "start-all": "tsc && ./start-all.sh" + "start": "tsc && node dist/examples/ollama/src/index.js" }, "author": "", "license": "ISC", diff --git a/examples/ollama/requirements.txt b/examples/ollama/requirements.txt deleted file mode 100644 index 1fd16b66..00000000 --- a/examples/ollama/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -Flask==2.0.1 -Werkzeug==2.0.1 -huggingface_hub>=0.8.0 -sentence_transformers>=2.2.0 -Jinja2>=3.0 \ No newline at end of file diff --git a/examples/ollama/server.py b/examples/ollama/server.py deleted file mode 100644 index 71e3208a..00000000 --- a/examples/ollama/server.py +++ /dev/null @@ -1,25 +0,0 @@ -from flask import Flask, request, jsonify -from sentence_transformers import SentenceTransformer -import argparse - -app = Flask(__name__) - -# Set up command-line argument parsing -parser = argparse.ArgumentParser(description='Run a Flask server for embedding texts with Sentence Transformers.') -parser.add_argument('--model', type=str, default='all-MiniLM-L6-v2', - help='Model name for Sentence Transformers (default: all-MiniLM-L6-v2)') -parser.add_argument('--port', type=int, default=5000, - help='Port number for the Flask server (default: 5000)') -args = parser.parse_args() - -# Initialize the model based on the command-line argument -model = SentenceTransformer(args.model) - -@app.route('/embed', methods=['POST']) -def embed(): - texts = request.json['texts'] - embeddings = model.encode(texts, convert_to_tensor=False, convert_to_numpy=True) - return jsonify(embeddings.tolist()) - -if __name__ == '__main__': - app.run(port=args.port) diff --git a/examples/ollama/src/index.ts b/examples/ollama/src/index.ts index d2e01780..12c06efc 100644 --- a/examples/ollama/src/index.ts +++ b/examples/ollama/src/index.ts @@ -1,12 +1,10 @@ -import { RAGApplicationBuilder, WebLoader, YoutubeLoader, SitemapLoader, Ollama, LocalEmbeddings } from '../../../src/index.js'; +import { RAGApplicationBuilder, WebLoader, YoutubeLoader, SitemapLoader, Ollama, AdaEmbeddings } from '../../../src/index.js'; import { HNSWDb } from '../../../src/vectorDb/hnswlib-db.js'; const modelName = process.argv[2] || 'llama3'; -const serverUrl = process.argv[3] || 'http://localhost:5000/embed'; -const dimensions = parseInt(process.argv[4], 10) || 384; const llmApplication = await new RAGApplicationBuilder() - .setEmbeddingModel(new LocalEmbeddings( serverUrl, dimensions )) + .setEmbeddingModel(new AdaEmbeddings()) .setModel(new Ollama({ modelName: modelName, baseUrl: 'http://localhost:11434' diff --git a/src/embeddings/local-embeddings.ts b/src/embeddings/local-embeddings.ts deleted file mode 100644 index ecadf656..00000000 --- a/src/embeddings/local-embeddings.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { BaseEmbeddings } from '../interfaces/base-embeddings.js'; -import axios from 'axios'; - -export class LocalEmbeddings implements BaseEmbeddings { - private serverUrl: string; - private dimensions: number; - - // Dimensions for 'all-MiniLM-L6-v2' are 384 - constructor(serverUrl: string = 'http://localhost:5000/embed', dimensions: number = 384) { - this.serverUrl = serverUrl; - this.dimensions = dimensions; - } - - getDimensions(): number { - return this.dimensions; - } - - async embedDocuments(texts: string[]): Promise { - try { - const response = await axios.post(this.serverUrl, { texts }); - return response.data; - } catch (error) { - console.error('Error embedding documents:', error); - throw error; - } - } - - async embedQuery(text: string): Promise { - return this.embedDocuments([text]).then(res => res[0]); - } -} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index bb9ba6ce..938f4990 100644 --- a/src/index.ts +++ b/src/index.ts @@ -20,7 +20,6 @@ import { AdaEmbeddings } from './embeddings/ada-embeddings.js'; import { CohereEmbeddings } from './embeddings/cohere-embeddings.js'; import { OpenAi3LargeEmbeddings } from './embeddings/openai-3large-embeddings.js'; import { OpenAi3SmallEmbeddings } from './embeddings/openai-3small-embeddings.js'; -import { LocalEmbeddings } from './embeddings/local-embeddings.js'; import { Mistral } from './models/mistral-model.js'; import { HuggingFace } from './models/huggingface-model.js'; import { Anthropic } from './models/anthropic-model.js'; @@ -49,7 +48,6 @@ export { CohereEmbeddings, OpenAi3LargeEmbeddings, OpenAi3SmallEmbeddings, - LocalEmbeddings, Mistral, HuggingFace, Anthropic,