Skip to content

Commit

Permalink
code documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
adhityan committed May 30, 2024
1 parent 10cf3a7 commit 0ca2330
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 2 deletions.
32 changes: 31 additions & 1 deletion src/core/dynamic-loader-selector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,22 @@ export type LoaderParam =
| ({ type: 'LocalPath' } & ConstructorParameters<typeof LocalPathLoader>[0])
| ({ type: 'Url' } & ConstructorParameters<typeof UrlLoader>[0]);

/**
* This class generates different types of loaders based on a string input.
*/
export class DynamicLoader {
private static readonly debug = createDebugMessages('embedjs:DynamicLoader');

/**
* The function `unfurlLoader` determines the type of loader based on the input string and returns
* the corresponding loader object.
* @param {string} loader - The `loader` parameter in the `unfurlLoader` function is a string that
* represents the source from which data will be loaded. It can be a URL, a local file path, a JSON
* string, or a YouTube video ID. The function checks the type of loader and returns an appropriate
* @returns The function `unfurlLoader` returns an instance of a subclass of `BaseLoader` based on
* the type of input `loader` provided. The possible return types are `UrlLoader`,
* `LocalPathLoader`, `JsonLoader`, or `YoutubeLoader`.
*/
private static async unfurlLoader(loader: string): Promise<BaseLoader> {
if (isValidURL(loader)) {
DynamicLoader.debug('Loader is a valid URL!');
Expand All @@ -58,6 +71,16 @@ export class DynamicLoader {
}
}

/**
* The function `createLoader` dynamically creates and returns a loader object based on the input provided.
* @param {LoaderParam} loader - The `createLoader` function is designed to create a loader based
* on the input provided. The `loader` parameter can be of type `string`, an instance of
* `BaseLoader`, or an object with a `type` property specifying the type of loader to create.
* @returns The `createLoader` function returns a Promise that resolves to an instance of a
* specific type of loader based on the input `loader` parameter. The function checks the type of
* the `loader` parameter and returns different loader instances based on the type or properties of
* the input.
*/
public static async createLoader(loader: LoaderParam): Promise<BaseLoader> {
if (typeof loader === 'string') {
DynamicLoader.debug('Loader is of type string; unfurling');
Expand Down Expand Up @@ -109,7 +132,14 @@ export class DynamicLoader {
throw new SyntaxError(`Unknown loader ${loader}`);
}

/**
* The function `createLoaders` asynchronously creates multiple loaders using the provided
* parameters and returns them as an array.
* @param {LoaderParam[]} loaders - An array of LoaderParam objects.
* @returns An array of BaseLoader objects is being returned after creating loaders using the
* DynamicLoader class.
*/
public static async createLoaders(loaders: LoaderParam[]): Promise<BaseLoader[]> {
return (await Promise.all(loaders.map(DynamicLoader.createLoader))).flat(1);
return await Promise.all(loaders.map(DynamicLoader.createLoader));
}
}
129 changes: 128 additions & 1 deletion src/core/rag-application.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,34 @@ export class RAGApplication {
if (!this.vectorDb) throw new SyntaxError('VectorDb not set');
}

/**
* The function `embedChunks` embeds the content of chunks by invoking the planned embedding model.
* @param {Pick<Chunk, 'pageContent'>[]} chunks - The `chunks` parameter is an array of objects
* that have a property `pageContent` which contains text content for each chunk.
* @returns The `embedChunks` function is returning the embedded vectors for the chunks.
*/
private async embedChunks(chunks: Pick<Chunk, 'pageContent'>[]) {
const texts = chunks.map(({ pageContent }) => pageContent);
return RAGEmbedding.getEmbedding().embedDocuments(texts);
}

/**
* The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
* an increment ID.
* @param {string} loaderUniqueId - A unique identifier for the loader.
* @param {number} incrementId - The `incrementId` parameter is a number that represents the
* increment value used to generate a unique chunk identifier.
* @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
* `incrementId`.
*/
private getChunkUniqueId(loaderUniqueId: string, incrementId: number) {
return `${loaderUniqueId}_${incrementId}`;
}

/**
* This async function initializes various components such as loaders, model, vector database,
* cache, and pre-loaders.
*/
public async init() {
this.loaders = await DynamicLoader.createLoaders(this.rawLoaders);

Expand All @@ -75,11 +94,31 @@ export class RAGApplication {
this.debug('Initialized pre-loaders');
}

/**
* The function `addLoader` asynchronously initalizes a loader using the provided parameters and adds
* it to the system.
* @param {LoaderParam} loaderParam - The `loaderParam` parameter is a string, object or instance of BaseLoader
* that contains the necessary information to create a loader.
* @returns The function `addLoader` returns an object with the following properties:
* - `entriesAdded`: Number of new entries added during the loader operation
* - `uniqueId`: Unique identifier of the loader
* - `loaderType`: Name of the loader's constructor class
*/
public async addLoader(loaderParam: LoaderParam): Promise<AddLoaderReturn> {
const loader = await DynamicLoader.createLoader(loaderParam);
return this._addLoader(loader);
}


/**
* The function `_addLoader` asynchronously adds a loader, processes its chunks, and handles
* incremental loading if supported by the loader.
* @param {BaseLoader} loader - The `loader` parameter in the `_addLoader` method is an instance of the
* `BaseLoader` class.
* @returns The function `_addLoader` returns an object with the following properties:
* - `entriesAdded`: Number of new entries added during the loader operation
* - `uniqueId`: Unique identifier of the loader
* - `loaderType`: Name of the loader's constructor class
*/
private async _addLoader(loader: BaseLoader): Promise<AddLoaderReturn> {
const uniqueId = loader.getUniqueId();
this.debug('Add loader called for', uniqueId);
Expand Down Expand Up @@ -114,12 +153,29 @@ export class RAGApplication {
return { entriesAdded: newInserts, uniqueId, loaderType: loader.constructor.name };
}

/**
* The `incrementalLoader` function asynchronously processes incremental chunks for a loader.
* @param {string} uniqueId - The `uniqueId` parameter is a string that serves as an identifier for
* the loader.
* @param incrementalGenerator - The `incrementalGenerator` parameter is an asynchronous generator
* function that yields `LoaderChunk` objects. It is used to incrementally load chunks of data for a specific loader
*/
private async incrementalLoader(uniqueId: string, incrementalGenerator: AsyncGenerator<LoaderChunk, void, void>) {
this.debug(`incrementalChunkAvailable for loader`, uniqueId);
const { newInserts } = await this.batchLoadChunks(uniqueId, incrementalGenerator);
this.debug(`${newInserts} new incrementalChunks processed`, uniqueId);
}

/**
* The function `deleteLoader` deletes embeddings from a loader after confirming the action.
* @param {string} uniqueLoaderId - The `uniqueLoaderId` parameter is a string that represents the
* identifier of the loader that you want to delete.
* @param {boolean} [areYouSure=false] - The `areYouSure` parameter is a boolean flag that
* indicates whether the user has confirmed their intention to delete embeddings from a loader. If
* `areYouSure` is `true`, the function proceeds with the deletion process. If `areYouSure` is
* `false`, a warning message is logged and no action is taken
* @returns The `deleteLoader` method returns a boolean value indicating the success of the operation.
*/
public async deleteLoader(uniqueLoaderId: string, areYouSure: boolean = false) {
if (!areYouSure) {
console.warn('Delete embeddings from loader called without confirmation. No action taken.');
Expand All @@ -132,6 +188,17 @@ export class RAGApplication {
return deleteResult;
}

/**
* The function `batchLoadChunks` processes chunks of data in batches and formats them for insertion.
* @param {string} uniqueId - The `uniqueId` parameter is a string that represents a unique
* identifier for loader being processed.
* @param incrementalGenerator - The `incrementalGenerator` parameter in the `batchLoadChunks`
* function is an asynchronous generator that yields `LoaderChunk` objects.
* @returns The `batchLoadChunks` function returns an object with two properties:
* 1. `newInserts`: The total number of new inserts made during the batch loading process.
* 2. `formattedChunks`: An array containing the formatted chunks that were processed during the
* batch loading process.
*/
private async batchLoadChunks(uniqueId: string, incrementalGenerator: AsyncGenerator<LoaderChunk, void, void>) {
let i = 0,
batchSize = 0,
Expand Down Expand Up @@ -162,6 +229,18 @@ export class RAGApplication {
return { newInserts, formattedChunks };
}

/**
* The function `batchLoadEmbeddings` asynchronously loads embeddings for formatted chunks and
* inserts them into a vector database.
* @param {string} loaderUniqueId - The `loaderUniqueId` parameter is a unique identifier for the
* loader that is used to load embeddings.
* @param {Chunk[]} formattedChunks - `formattedChunks` is an array of Chunk objects that contain
* page content, metadata, and other information needed for processing. The `batchLoadEmbeddings`
* function processes these chunks in batches to obtain embeddings for each chunk and then inserts
* them into a database for further use.
* @returns The function `batchLoadEmbeddings` returns the result of inserting the embed chunks
* into the vector database.
*/
private async batchLoadEmbeddings(loaderUniqueId: string, formattedChunks: Chunk[]) {
if (formattedChunks.length === 0) return 0;

Expand All @@ -179,10 +258,26 @@ export class RAGApplication {
return this.vectorDb.insertChunks(embedChunks);
}

/**
* The function `getEmbeddingsCount` returns the count of embeddings stored in a vector database
* asynchronously.
* @returns The `getEmbeddingsCount` method is returning the number of embeddings stored in the
* vector database. It is an asynchronous function that returns a Promise with the count of
* embeddings as a number.
*/
public async getEmbeddingsCount(): Promise<number> {
return this.vectorDb.getVectorCount();
}

/**
* The function `deleteAllEmbeddings` deletes all embeddings from the vector database if a
* confirmation is provided.
* @param {boolean} [areYouSure=false] - The `areYouSure` parameter is a boolean parameter that
* serves as a confirmation flag. It is used to ensure that the deletion of all embeddings is
* intentional and requires the caller to explicitly confirm by passing `true` as the value. If
* `areYouSure` is `false`, a warning message is logged.
* @returns The `deleteAllEmbeddings` function returns a boolean value indicating the result.
*/
public async deleteAllEmbeddings(areYouSure: boolean = false) {
if (!areYouSure) {
console.warn('Reset embeddings called without confirmation. No action taken.');
Expand All @@ -193,6 +288,17 @@ export class RAGApplication {
return true;
}

/**
* The function `getEmbeddings` retrieves embeddings for a query, performs similarity search,
* filters and sorts the results based on relevance score, and returns a subset of the top results.
* @param {string} cleanQuery - The `cleanQuery` parameter is a string that represents the query
* input after it has been cleaned or processed to remove any unnecessary characters, symbols, or
* noise. This clean query is then used to generate embeddings for similarity search.
* @returns The `getEmbeddings` function returns a filtered and sorted array of search results based
* on the similarity score of the query embedded in the cleanQuery string. The results are filtered
* based on a relevance cutoff value, sorted in descending order of score, and then sliced to return
* only the number of results specified by the `searchResultCount` property.
*/
public async getEmbeddings(cleanQuery: string) {
const queryEmbedded = await RAGEmbedding.getEmbedding().embedQuery(cleanQuery);
const unfilteredResultSet = await this.vectorDb.similaritySearch(queryEmbedded, this.searchResultCount + 10);
Expand All @@ -203,13 +309,34 @@ export class RAGApplication {
.slice(0, this.searchResultCount);
}

/**
* The getContext function retrieves the unique embeddings for a given query without calling a LLM.
* @param {string} query - The `query` parameter is a string that represents the input query that
* needs to be processed.
* @returns An array of unique page content items / chunks.
*/
public async getContext(query: string) {
const cleanQuery = cleanString(query);
const rawContext = await this.getEmbeddings(cleanQuery);

return [...new Map(rawContext.map((item) => [item.pageContent, item])).values()];
}

/**
* This function takes a user query, retrieves relevant context, identifies unique sources, and
* returns the query result along with the list of sources.
* @param {string} userQuery - The `userQuery` parameter is a string that represents the query
* input provided by the user. It is used as input to retrieve context and ultimately generate a
* result based on the query.
* @param {string} [conversationId] - The `conversationId` parameter in the `query` method is an
* optional parameter that represents the unique identifier for a conversation. It allows you to
* track and associate the query with a specific conversation thread if needed. If provided, it can be
* used to maintain context or history related to the conversation.
* @returns The `query` method returns a Promise that resolves to an object with two properties:
* `result` and `sources`. The `result` property is a string representing the result of querying
* the LLM model with the provided query template, user query, context, and conversation history. The
* `sources` property is an array of strings representing unique sources used to generate the LLM response.
*/
public async query(
userQuery: string,
conversationId?: string,
Expand Down
4 changes: 4 additions & 0 deletions src/interfaces/base-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ export abstract class BaseLoader<
this.emit('incrementalChunkAvailable', incrementalGenerator);
}

/**
* This TypeScript function asynchronously processes chunks of data, cleans up the content,
* calculates a content hash, and yields the modified chunks.
*/
public async *getChunks(): AsyncGenerator<LoaderChunk<T>, void, void> {
const chunks = await this.getUnfilteredChunks();

Expand Down
16 changes: 16 additions & 0 deletions src/interfaces/base-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ export abstract class BaseModel {

public async init(): Promise<void> {}

/**
* The query function asynchronously processes user queries mixing references from a vector database
* and maintains the conversation history.
* @param {string} system - This is the system prompt passed to the LLM.
* @param {string} userQuery - The `userQuery` parameter in the `query` method represents the query
* or question inputted by the user that the system will process and provide a response to.
* @param {Chunk[]} supportingContext - The `supportingContext` parameter in the `query` method is
* an array of `Chunk` objects. Each `Chunk` object typically contains information or context
* relevant to the user query being processed. The `supportingContext` is used to provide
* additional RAG context to the system when running the query,
* @param {string} [conversationId=default] - The `conversationId` parameter in the `query` method
* is a unique identifier for a conversation. It is used to keep track of the conversation history
* and context for each conversation. If a conversation with the specified `conversationId` does
* not exist in the `conversationMap`, a new entry is created
* @returns The `query` method returns a Promise that resolves to a string with the LLM response.
*/
public async query(
system: string,
userQuery: string,
Expand Down

0 comments on commit 0ca2330

Please sign in to comment.