code documentation

llm-tools · May 30, 2024 · 0ca2330 · 0ca2330
1 parent 10cf3a7
commit 0ca2330
Show file tree

Hide file tree

Showing 4 changed files with 179 additions and 2 deletions.
diff --git a/src/core/dynamic-loader-selector.ts b/src/core/dynamic-loader-selector.ts
@@ -37,9 +37,22 @@ export type LoaderParam =
     | ({ type: 'LocalPath' } & ConstructorParameters<typeof LocalPathLoader>[0])
     | ({ type: 'Url' } & ConstructorParameters<typeof UrlLoader>[0]);
 
+/**
+ * This class generates different types of loaders based on a string input.
+ */
 export class DynamicLoader {
     private static readonly debug = createDebugMessages('embedjs:DynamicLoader');
 
+    /**
+     * The function `unfurlLoader` determines the type of loader based on the input string and returns
+     * the corresponding loader object.
+     * @param {string} loader - The `loader` parameter in the `unfurlLoader` function is a string that
+     * represents the source from which data will be loaded. It can be a URL, a local file path, a JSON
+     * string, or a YouTube video ID. The function checks the type of loader and returns an appropriate
+     * @returns The function `unfurlLoader` returns an instance of a subclass of `BaseLoader` based on
+     * the type of input `loader` provided. The possible return types are `UrlLoader`,
+     * `LocalPathLoader`, `JsonLoader`, or `YoutubeLoader`.
+     */
     private static async unfurlLoader(loader: string): Promise<BaseLoader> {
         if (isValidURL(loader)) {
             DynamicLoader.debug('Loader is a valid URL!');
@@ -58,6 +71,16 @@ export class DynamicLoader {
         }
     }
 
+    /**
+     * The function `createLoader` dynamically creates and returns a loader object based on the input provided.
+     * @param {LoaderParam} loader - The `createLoader` function is designed to create a loader based
+     * on the input provided. The `loader` parameter can be of type `string`, an instance of
+     * `BaseLoader`, or an object with a `type` property specifying the type of loader to create.
+     * @returns The `createLoader` function returns a Promise that resolves to an instance of a
+     * specific type of loader based on the input `loader` parameter. The function checks the type of
+     * the `loader` parameter and returns different loader instances based on the type or properties of
+     * the input.
+     */
     public static async createLoader(loader: LoaderParam): Promise<BaseLoader> {
         if (typeof loader === 'string') {
             DynamicLoader.debug('Loader is of type string; unfurling');
@@ -109,7 +132,14 @@ export class DynamicLoader {
         throw new SyntaxError(`Unknown loader ${loader}`);
     }
 
+    /**
+     * The function `createLoaders` asynchronously creates multiple loaders using the provided
+     * parameters and returns them as an array.
+     * @param {LoaderParam[]} loaders - An array of LoaderParam objects.
+     * @returns An array of BaseLoader objects is being returned after creating loaders using the
+     * DynamicLoader class.
+     */
     public static async createLoaders(loaders: LoaderParam[]): Promise<BaseLoader[]> {
-        return (await Promise.all(loaders.map(DynamicLoader.createLoader))).flat(1);
+        return await Promise.all(loaders.map(DynamicLoader.createLoader));
     }
 }
diff --git a/src/core/rag-application.ts b/src/core/rag-application.ts
@@ -45,15 +45,34 @@ export class RAGApplication {
         if (!this.vectorDb) throw new SyntaxError('VectorDb not set');
     }
 
+    /**
+     * The function `embedChunks` embeds the content of chunks by invoking the planned embedding model.
+     * @param {Pick<Chunk, 'pageContent'>[]} chunks - The `chunks` parameter is an array of objects
+     * that have a property `pageContent` which contains text content for each chunk.
+     * @returns The `embedChunks` function is returning the embedded vectors for the chunks.
+     */
     private async embedChunks(chunks: Pick<Chunk, 'pageContent'>[]) {
         const texts = chunks.map(({ pageContent }) => pageContent);
         return RAGEmbedding.getEmbedding().embedDocuments(texts);
     }
 
+   /**
+    * The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
+    * an increment ID.
+    * @param {string} loaderUniqueId - A unique identifier for the loader.
+    * @param {number} incrementId - The `incrementId` parameter is a number that represents the
+    * increment value used to generate a unique chunk identifier.
+    * @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
+    * `incrementId`.
+    */
     private getChunkUniqueId(loaderUniqueId: string, incrementId: number) {
         return `${loaderUniqueId}_${incrementId}`;
     }
 
+    /**
+     * This async function initializes various components such as loaders, model, vector database,
+     * cache, and pre-loaders.
+     */
     public async init() {
         this.loaders = await DynamicLoader.createLoaders(this.rawLoaders);
 
@@ -75,11 +94,31 @@ export class RAGApplication {
         this.debug('Initialized pre-loaders');
     }
 
+   /**
+    * The function `addLoader` asynchronously initalizes a loader using the provided parameters and adds
+    * it to the system.
+    * @param {LoaderParam} loaderParam - The `loaderParam` parameter is a string, object or instance of BaseLoader 
+    * that contains the necessary information to create a loader. 
+    * @returns The function `addLoader` returns an object with the following properties:
+ * - `entriesAdded`: Number of new entries added during the loader operation
+ * - `uniqueId`: Unique identifier of the loader
+ * - `loaderType`: Name of the loader's constructor class
+    */
     public async addLoader(loaderParam: LoaderParam): Promise<AddLoaderReturn> {
         const loader = await DynamicLoader.createLoader(loaderParam);
         return this._addLoader(loader);
     }
-
+
+/**
+ * The function `_addLoader` asynchronously adds a loader, processes its chunks, and handles
+ * incremental loading if supported by the loader.
+ * @param {BaseLoader} loader - The `loader` parameter in the `_addLoader` method is an instance of the
+ * `BaseLoader` class.
+ * @returns The function `_addLoader` returns an object with the following properties:
+ * - `entriesAdded`: Number of new entries added during the loader operation
+ * - `uniqueId`: Unique identifier of the loader
+ * - `loaderType`: Name of the loader's constructor class
+ */
     private async _addLoader(loader: BaseLoader): Promise<AddLoaderReturn> {
         const uniqueId = loader.getUniqueId();
         this.debug('Add loader called for', uniqueId);
@@ -114,12 +153,29 @@ export class RAGApplication {
         return { entriesAdded: newInserts, uniqueId, loaderType: loader.constructor.name };
     }
 
+   /**
+    * The `incrementalLoader` function asynchronously processes incremental chunks for a loader.
+    * @param {string} uniqueId - The `uniqueId` parameter is a string that serves as an identifier for
+    * the loader.
+    * @param incrementalGenerator - The `incrementalGenerator` parameter is an asynchronous generator
+    * function that yields `LoaderChunk` objects. It is used to incrementally load chunks of data for a specific loader
+    */
     private async incrementalLoader(uniqueId: string, incrementalGenerator: AsyncGenerator<LoaderChunk, void, void>) {
         this.debug(`incrementalChunkAvailable for loader`, uniqueId);
         const { newInserts } = await this.batchLoadChunks(uniqueId, incrementalGenerator);
         this.debug(`${newInserts} new incrementalChunks processed`, uniqueId);
     }
 
+    /**
+     * The function `deleteLoader` deletes embeddings from a loader after confirming the action.
+     * @param {string} uniqueLoaderId - The `uniqueLoaderId` parameter is a string that represents the
+     * identifier of the loader that you want to delete.
+     * @param {boolean} [areYouSure=false] - The `areYouSure` parameter is a boolean flag that
+     * indicates whether the user has confirmed their intention to delete embeddings from a loader. If
+     * `areYouSure` is `true`, the function proceeds with the deletion process. If `areYouSure` is
+     * `false`, a warning message is logged and no action is taken
+     * @returns The `deleteLoader` method returns a boolean value indicating the success of the operation.
+     */
     public async deleteLoader(uniqueLoaderId: string, areYouSure: boolean = false) {
         if (!areYouSure) {
             console.warn('Delete embeddings from loader called without confirmation. No action taken.');
@@ -132,6 +188,17 @@ export class RAGApplication {
         return deleteResult;
     }
 
+    /**
+     * The function `batchLoadChunks` processes chunks of data in batches and formats them for insertion.
+     * @param {string} uniqueId - The `uniqueId` parameter is a string that represents a unique
+     * identifier for loader being processed.
+     * @param incrementalGenerator - The `incrementalGenerator` parameter in the `batchLoadChunks`
+     * function is an asynchronous generator that yields `LoaderChunk` objects.
+     * @returns The `batchLoadChunks` function returns an object with two properties:
+     * 1. `newInserts`: The total number of new inserts made during the batch loading process.
+     * 2. `formattedChunks`: An array containing the formatted chunks that were processed during the
+     * batch loading process.
+     */
     private async batchLoadChunks(uniqueId: string, incrementalGenerator: AsyncGenerator<LoaderChunk, void, void>) {
         let i = 0,
             batchSize = 0,
@@ -162,6 +229,18 @@ export class RAGApplication {
         return { newInserts, formattedChunks };
     }
 
+    /**
+     * The function `batchLoadEmbeddings` asynchronously loads embeddings for formatted chunks and
+     * inserts them into a vector database.
+     * @param {string} loaderUniqueId - The `loaderUniqueId` parameter is a unique identifier for the
+     * loader that is used to load embeddings.
+     * @param {Chunk[]} formattedChunks - `formattedChunks` is an array of Chunk objects that contain
+     * page content, metadata, and other information needed for processing. The `batchLoadEmbeddings`
+     * function processes these chunks in batches to obtain embeddings for each chunk and then inserts
+     * them into a database for further use.
+     * @returns The function `batchLoadEmbeddings` returns the result of inserting the embed chunks
+     * into the vector database.
+     */
     private async batchLoadEmbeddings(loaderUniqueId: string, formattedChunks: Chunk[]) {
         if (formattedChunks.length === 0) return 0;
 
@@ -179,10 +258,26 @@ export class RAGApplication {
         return this.vectorDb.insertChunks(embedChunks);
     }
 
+    /**
+     * The function `getEmbeddingsCount` returns the count of embeddings stored in a vector database
+     * asynchronously.
+     * @returns The `getEmbeddingsCount` method is returning the number of embeddings stored in the
+     * vector database. It is an asynchronous function that returns a Promise with the count of
+     * embeddings as a number.
+     */
     public async getEmbeddingsCount(): Promise<number> {
         return this.vectorDb.getVectorCount();
     }
 
+    /**
+     * The function `deleteAllEmbeddings` deletes all embeddings from the vector database if a
+     * confirmation is provided.
+     * @param {boolean} [areYouSure=false] - The `areYouSure` parameter is a boolean parameter that
+     * serves as a confirmation flag. It is used to ensure that the deletion of all embeddings is
+     * intentional and requires the caller to explicitly confirm by passing `true` as the value. If
+     * `areYouSure` is `false`, a warning message is logged.
+     * @returns The `deleteAllEmbeddings` function returns a boolean value indicating the result.
+     */
     public async deleteAllEmbeddings(areYouSure: boolean = false) {
         if (!areYouSure) {
             console.warn('Reset embeddings called without confirmation. No action taken.');
@@ -193,6 +288,17 @@ export class RAGApplication {
         return true;
     }
 
+   /**
+    * The function `getEmbeddings` retrieves embeddings for a query, performs similarity search,
+    * filters and sorts the results based on relevance score, and returns a subset of the top results.
+    * @param {string} cleanQuery - The `cleanQuery` parameter is a string that represents the query
+    * input after it has been cleaned or processed to remove any unnecessary characters, symbols, or
+    * noise. This clean query is then used to generate embeddings for similarity search.
+    * @returns The `getEmbeddings` function returns a filtered and sorted array of search results based
+    * on the similarity score of the query embedded in the cleanQuery string. The results are filtered
+    * based on a relevance cutoff value, sorted in descending order of score, and then sliced to return
+    * only the number of results specified by the `searchResultCount` property.
+    */
     public async getEmbeddings(cleanQuery: string) {
         const queryEmbedded = await RAGEmbedding.getEmbedding().embedQuery(cleanQuery);
         const unfilteredResultSet = await this.vectorDb.similaritySearch(queryEmbedded, this.searchResultCount + 10);
@@ -203,13 +309,34 @@ export class RAGApplication {
             .slice(0, this.searchResultCount);
     }
 
+    /**
+     * The getContext function retrieves the unique embeddings for a given query without calling a LLM.
+     * @param {string} query - The `query` parameter is a string that represents the input query that
+     * needs to be processed.
+     * @returns An array of unique page content items / chunks.
+     */
     public async getContext(query: string) {
         const cleanQuery = cleanString(query);
         const rawContext = await this.getEmbeddings(cleanQuery);
 
         return [...new Map(rawContext.map((item) => [item.pageContent, item])).values()];
     }
 
+    /**
+     * This function takes a user query, retrieves relevant context, identifies unique sources, and
+     * returns the query result along with the list of sources.
+     * @param {string} userQuery - The `userQuery` parameter is a string that represents the query
+     * input provided by the user. It is used as input to retrieve context and ultimately generate a
+     * result based on the query.
+     * @param {string} [conversationId] - The `conversationId` parameter in the `query` method is an
+     * optional parameter that represents the unique identifier for a conversation. It allows you to
+     * track and associate the query with a specific conversation thread if needed. If provided, it can be
+     * used to maintain context or history related to the conversation.
+     * @returns The `query` method returns a Promise that resolves to an object with two properties:
+     * `result` and `sources`. The `result` property is a string representing the result of querying
+     * the LLM model with the provided query template, user query, context, and conversation history. The
+     * `sources` property is an array of strings representing unique sources used to generate the LLM response.
+     */
     public async query(
         userQuery: string,
         conversationId?: string,

diff --git a/src/interfaces/base-loader.ts b/src/interfaces/base-loader.ts
@@ -73,6 +73,10 @@ export abstract class BaseLoader<
         this.emit('incrementalChunkAvailable', incrementalGenerator);
     }
 
+   /**
+    * This TypeScript function asynchronously processes chunks of data, cleans up the content,
+    * calculates a content hash, and yields the modified chunks.
+    */
     public async *getChunks(): AsyncGenerator<LoaderChunk<T>, void, void> {
         const chunks = await this.getUnfilteredChunks();
 

diff --git a/src/interfaces/base-model.ts b/src/interfaces/base-model.ts
@@ -23,6 +23,22 @@ export abstract class BaseModel {
 
     public async init(): Promise<void> {}
 
+    /**
+     * The query function asynchronously processes user queries mixing references from a vector database
+     * and maintains the conversation history.
+     * @param {string} system - This is the system prompt passed to the LLM.
+     * @param {string} userQuery - The `userQuery` parameter in the `query` method represents the query
+     * or question inputted by the user that the system will process and provide a response to. 
+     * @param {Chunk[]} supportingContext - The `supportingContext` parameter in the `query` method is
+     * an array of `Chunk` objects. Each `Chunk` object typically contains information or context
+     * relevant to the user query being processed. The `supportingContext` is used to provide
+     * additional RAG context to the system when running the query,
+     * @param {string} [conversationId=default] - The `conversationId` parameter in the `query` method
+     * is a unique identifier for a conversation. It is used to keep track of the conversation history
+     * and context for each conversation. If a conversation with the specified `conversationId` does
+     * not exist in the `conversationMap`, a new entry is created
+     * @returns The `query` method returns a Promise that resolves to a string with the LLM response.
+     */
     public async query(
         system: string,
         userQuery: string,