Merge pull request #119 from llm-tools/monorepo

v0.1.3
llm-tools · Oct 6, 2024 · 5d3b526 · 5d3b526
2 parents c4d4781 + 0348134
commit 5d3b526
Show file tree

Hide file tree

Showing 59 changed files with 691 additions and 113 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "docwriter.style": "JSDoc"
+}
diff --git a/core/embedjs-interfaces/CHANGELOG.md b/core/embedjs-interfaces/CHANGELOG.md
@@ -1,3 +1,22 @@
+## 0.1.3 (2024-10-06)
+
+
+### 🚀 Features
+
+- readded local-path and url loaders ([303133c](https://github.com/llm-tools/embedJs/commit/303133c))
+
+
+### 🩹 Fixes
+
+- exclude examples from release process ([1382185](https://github.com/llm-tools/embedJs/commit/1382185))
+
+- downgrade esbuild version to match nx requirements ([183308f](https://github.com/llm-tools/embedJs/commit/183308f))
+
+
+### ❤️  Thank You
+
+- Adhityan K V @adhityan
+
 ## 0.1.2 (2024-10-06)
 
 

diff --git a/core/embedjs-interfaces/package.json b/core/embedjs-interfaces/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@llm-tools/embedjs-interfaces",
-    "version": "0.1.2",
+    "version": "0.1.3",
     "description": "Interfaces for extending the embedjs ecosystem",
     "dependencies": {
         "debug": "^4.3.7",

diff --git a/core/embedjs-interfaces/src/interfaces/base-model.ts b/core/embedjs-interfaces/src/interfaces/base-model.ts
@@ -28,13 +28,14 @@ export abstract class BaseModel {
         return this._temperature ?? BaseModel.defaultTemperature;
     }
 
+    // eslint-disable-next-line @typescript-eslint/no-empty-function
     public async init(): Promise<void> {}
 
     public async query(
         system: string,
         userQuery: string,
         supportingContext: Chunk[],
-        conversationId: string = 'default',
+        conversationId = 'default',
     ): Promise<QueryResponse> {
         const conversation = await BaseModel.conversations.getConversation(conversationId);
         this.baseDebug(`${conversation.entries.length} history entries found for conversationId '${conversationId}'`);

diff --git a/core/embedjs-utils/CHANGELOG.md b/core/embedjs-utils/CHANGELOG.md
@@ -1,3 +1,22 @@
+## 0.1.3 (2024-10-06)
+
+
+### 🚀 Features
+
+- readded local-path and url loaders ([303133c](https://github.com/llm-tools/embedJs/commit/303133c))
+
+
+### 🩹 Fixes
+
+- exclude examples from release process ([1382185](https://github.com/llm-tools/embedJs/commit/1382185))
+
+- downgrade esbuild version to match nx requirements ([183308f](https://github.com/llm-tools/embedJs/commit/183308f))
+
+
+### ❤️  Thank You
+
+- Adhityan K V @adhityan
+
 ## 0.1.2 (2024-10-06)
 
 

diff --git a/core/embedjs-utils/package.json b/core/embedjs-utils/package.json
@@ -1,9 +1,9 @@
 {
     "name": "@llm-tools/embedjs-utils",
-    "version": "0.1.2",
+    "version": "0.1.3",
     "description": "Useful util functions when extending the embedjs ecosystem",
     "dependencies": {
-        "@llm-tools/embedjs-interfaces": "0.1.2"
+        "@llm-tools/embedjs-interfaces": "0.1.3"
     },
     "type": "module",
     "main": "./src/index.js",

diff --git a/core/embedjs/CHANGELOG.md b/core/embedjs/CHANGELOG.md
@@ -1,3 +1,22 @@
+## 0.1.3 (2024-10-06)
+
+
+### 🚀 Features
+
+- readded local-path and url loaders ([303133c](https://github.com/llm-tools/embedJs/commit/303133c))
+
+
+### 🩹 Fixes
+
+- exclude examples from release process ([1382185](https://github.com/llm-tools/embedJs/commit/1382185))
+
+- downgrade esbuild version to match nx requirements ([183308f](https://github.com/llm-tools/embedJs/commit/183308f))
+
+
+### ❤️  Thank You
+
+- Adhityan K V @adhityan
+
 ## 0.1.2 (2024-10-06)
 
 

diff --git a/core/embedjs/package.json b/core/embedjs/package.json
@@ -1,11 +1,11 @@
 {
     "type": "module",
     "name": "@llm-tools/embedjs",
-    "version": "0.1.2",
+    "version": "0.1.3",
     "description": "A NodeJS RAG framework to easily work with LLMs and custom datasets",
     "dependencies": {
-        "@llm-tools/embedjs-interfaces": "0.1.2",
-        "@llm-tools/embedjs-utils": "0.1.2",
+        "@llm-tools/embedjs-interfaces": "0.1.3",
+        "@llm-tools/embedjs-utils": "0.1.3",
         "axios": "^1.7.7",
         "debug": "^4.3.7",
         "langchain": "^0.3.2",

diff --git a/core/embedjs/src/core/rag-application-builder.ts b/core/embedjs/src/core/rag-application-builder.ts
@@ -6,19 +6,20 @@ import {
     BaseEmbeddings,
     BaseLoader,
     BaseModel,
+    SIMPLE_MODELS,
 } from '@llm-tools/embedjs-interfaces';
 
 export class RAGApplicationBuilder {
     private temperature: number;
-    private queryTemplate: string;
+    private model: BaseModel | SIMPLE_MODELS | null;
+    private loaders: BaseLoader[];
+    private vectorDb: BaseDb;
     private cache?: BaseCache;
-    private model: BaseModel | null;
+    private conversations: BaseConversation;
+    private queryTemplate: string;
     private searchResultCount: number;
     private embeddingModel: BaseEmbeddings;
     private embeddingRelevanceCutOff: number;
-    private loaders: BaseLoader[];
-    private vectorDb: BaseDb;
-    private conversations: BaseConversation;
 
     constructor() {
         this.loaders = [];
@@ -33,9 +34,13 @@ export class RAGApplicationBuilder {
         this.embeddingRelevanceCutOff = 0;
     }
 
+    /**
+     * The `build` function creates a new `RAGApplication` entity and initializes it asynchronously based on provided parameters.
+     * @returns An instance of the `RAGApplication` class after it has been initialized asynchronously.
+     */
     async build() {
         const entity = new RAGApplication(this);
-        await entity.init();
+        await entity.init(this);
         return entity;
     }
 
@@ -44,11 +49,23 @@ export class RAGApplicationBuilder {
         return this;
     }
 
+    /**
+     * The setSearchResultCount function sets the search result count
+     * @param {number} searchResultCount - The `searchResultCount` parameter
+     * represents the count of search results picked up from the vector store per query.
+     * @returns The `this` object is being returned, which allows for method chaining.
+     */
     setSearchResultCount(searchResultCount: number) {
         this.searchResultCount = searchResultCount;
         return this;
     }
 
+    /**
+     * The function setVectorDb sets a BaseDb object
+     * @param {BaseDb} vectorDb - The `vectorDb` parameter is an instance of the `BaseDb` class, which
+     * is used to store vectors in a database.
+     * @returns The `this` object is being returned, which allows for method chaining.
+     */
     setVectorDb(vectorDb: BaseDb) {
         this.vectorDb = vectorDb;
         return this;
@@ -83,9 +100,13 @@ export class RAGApplicationBuilder {
         return this;
     }
 
-    setModel(model: 'NO_MODEL' | BaseModel) {
+    setModel(model: 'NO_MODEL' | SIMPLE_MODELS | BaseModel) {
         if (typeof model === 'object') this.model = model;
-        else this.model = null;
+        else {
+            if (model === 'NO_MODEL') this.model = null;
+            else this.model = model;
+        }
+
         return this;
     }
 

diff --git a/core/embedjs/src/core/rag-application.ts b/core/embedjs/src/core/rag-application.ts
@@ -14,75 +14,53 @@ import {
     InsertChunkData,
     LoaderChunk,
     QueryResponse,
+    SIMPLE_MODELS,
 } from '@llm-tools/embedjs-interfaces';
 import { cleanString, getUnique } from '@llm-tools/embedjs-utils';
 
 export class RAGApplication {
     private readonly debug = createDebugMessages('embedjs:core');
-    private readonly queryTemplate: string;
+    private readonly embeddingRelevanceCutOff: number;
     private readonly searchResultCount: number;
+    private readonly queryTemplate: string;
     private readonly cache?: BaseCache;
     private readonly vectorDb: BaseDb;
-    private readonly model: BaseModel;
-    private readonly embeddingRelevanceCutOff: number;
-
-    private readonly rawLoaders: BaseLoader[];
     private loaders: BaseLoader[];
+    private model: BaseModel;
 
     constructor(llmBuilder: RAGApplicationBuilder) {
         this.cache = llmBuilder.getCache();
         BaseLoader.setCache(this.cache);
 
-        this.model = llmBuilder.getModel();
-        if (!this.model) this.debug('No base model set; query function unavailable!');
-        else {
-            BaseModel.setDefaultTemperature(llmBuilder.getTemperature());
-            BaseModel.setConversations(llmBuilder.getConversationsEngine() || new InMemoryConversation());
-        }
-
         this.queryTemplate = cleanString(llmBuilder.getQueryTemplate());
         this.debug(`Using system query template - "${this.queryTemplate}"`);
 
         this.vectorDb = llmBuilder.getVectorDb();
         if (!this.vectorDb) throw new SyntaxError('VectorDb not set');
 
-        this.rawLoaders = llmBuilder.getLoaders();
         this.searchResultCount = llmBuilder.getSearchResultCount();
         this.embeddingRelevanceCutOff = llmBuilder.getEmbeddingRelevanceCutOff();
 
         RAGEmbedding.init(llmBuilder.getEmbeddingModel());
     }
 
     /**
-     * The function `embedChunks` embeds the content of chunks by invoking the planned embedding model.
-     * @param {Pick<Chunk, 'pageContent'>[]} chunks - The `chunks` parameter is an array of objects
-     * that have a property `pageContent` which contains text content for each chunk.
-     * @returns The `embedChunks` function is returning the embedded vectors for the chunks.
-     */
-    private async embedChunks(chunks: Pick<Chunk, 'pageContent'>[]) {
-        const texts = chunks.map(({ pageContent }) => pageContent);
-        return RAGEmbedding.getEmbedding().embedDocuments(texts);
-    }
-
-    /**
-     * The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
-     * an increment ID.
-     * @param {string} loaderUniqueId - A unique identifier for the loader.
-     * @param {number} incrementId - The `incrementId` parameter is a number that represents the
-     * increment value used to generate a unique chunk identifier.
-     * @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
-     * `incrementId`.
+     * The function initializes various components of a language model using provided configurations
+     * and data. This is an internal method and does not need to be invoked manually.
+     * @param {RAGApplicationBuilder} llmBuilder - The `llmBuilder` parameter in the `init` function is
+     * an instance of the `RAGApplicationBuilder` class. It is used to build and configure a Language
+     * Model (LLM) for a conversational AI system. The function initializes various components of the
+     * LLM based on the configuration provided
      */
-    private getChunkUniqueId(loaderUniqueId: string, incrementId: number) {
-        return `${loaderUniqueId}_${incrementId}`;
-    }
+    public async init(llmBuilder: RAGApplicationBuilder) {
+        this.model = await this.getModel(llmBuilder.getModel());
+        if (!this.model) this.debug('No base model set; query function unavailable!');
+        else {
+            BaseModel.setDefaultTemperature(llmBuilder.getTemperature());
+            BaseModel.setConversations(llmBuilder.getConversationsEngine() || new InMemoryConversation());
+        }
 
-    /**
-     * This async function initializes various components such as loaders, model, vector database,
-     * cache, and pre-loaders.
-     */
-    public async init() {
-        this.loaders = this.rawLoaders;
+        this.loaders = llmBuilder.getLoaders();
 
         if (this.model) {
             await this.model.init();
@@ -104,6 +82,56 @@ export class RAGApplication {
         this.debug('Initialized pre-loaders');
     }
 
+    /**
+     * The function getModel retrieves a specific BaseModel or SIMPLE_MODEL based on the input provided.
+     * @param {BaseModel | SIMPLE_MODELS | null} model - The `getModel` function you provided is an
+     * asynchronous function that takes a parameter `model` of type `BaseModel`, `SIMPLE_MODELS`, or
+     * `null`.
+     * @returns The `getModel` function returns a Promise that resolves to a `BaseModel` object. If the
+     * `model` parameter is an object, it returns the object itself. If the `model` parameter is
+     * `null`, it returns `null`. If the `model` parameter is a specific value from the `SIMPLE_MODELS`
+     * enum, it creates a new `BaseModel` object based on the model name.
+     */
+    private async getModel(model: BaseModel | SIMPLE_MODELS | null): Promise<BaseModel> {
+        if (typeof model === 'object') return model;
+        else if (model === null) return null;
+        else {
+            const { OpenAi } = await import('@llm-tools/embedjs-openai').catch(() => {
+                throw new Error('Package `@llm-tools/embedjs-openai` needs to be installed to use OpenAI models');
+            });
+            this.debug('Dynamically imported OpenAi');
+
+            if (model === SIMPLE_MODELS.OPENAI_GPT4_O) return new OpenAi({ modelName: 'gpt-4o' });
+            else if (model === SIMPLE_MODELS['OPENAI_GPT4_TURBO']) return new OpenAi({ modelName: 'gpt-4-turbo' });
+            else if (model === SIMPLE_MODELS['OPENAI_GPT3.5_TURBO']) return new OpenAi({ modelName: 'gpt-3.5-turbo' });
+            else throw new Error('Invalid model name');
+        }
+    }
+
+    /**
+     * The function `embedChunks` embeds the content of chunks by invoking the planned embedding model.
+     * @param {Pick<Chunk, 'pageContent'>[]} chunks - The `chunks` parameter is an array of objects
+     * that have a property `pageContent` which contains text content for each chunk.
+     * @returns The `embedChunks` function is returning the embedded vectors for the chunks.
+     */
+    private async embedChunks(chunks: Pick<Chunk, 'pageContent'>[]) {
+        const texts = chunks.map(({ pageContent }) => pageContent);
+        return RAGEmbedding.getEmbedding().embedDocuments(texts);
+    }
+
+    /**
+     * The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
+     * an increment ID.
+     * @param {string} loaderUniqueId - A unique identifier for the loader.
+     * @param {number} incrementId - The `incrementId` parameter is a number that represents the
+     * increment value used to generate a unique chunk identifier.
+     * @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
+     * `incrementId`.
+     */
+    private getChunkUniqueId(loaderUniqueId: string, incrementId: number) {
+        return `${loaderUniqueId}_${incrementId}`;
+    }
+
     /**
      * The function `addLoader` asynchronously initalizes a loader using the provided parameters and adds
      * it to the system.