Skip to content

Commit

Permalink
Merge pull request #119 from llm-tools/monorepo
Browse files Browse the repository at this point in the history
v0.1.3
  • Loading branch information
adhityan authored Oct 6, 2024
2 parents c4d4781 + 0348134 commit 5d3b526
Show file tree
Hide file tree
Showing 59 changed files with 691 additions and 113 deletions.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"docwriter.style": "JSDoc"
}
19 changes: 19 additions & 0 deletions core/embedjs-interfaces/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
## 0.1.3 (2024-10-06)


### 🚀 Features

- readded local-path and url loaders ([303133c](https://github.com/llm-tools/embedJs/commit/303133c))


### 🩹 Fixes

- exclude examples from release process ([1382185](https://github.com/llm-tools/embedJs/commit/1382185))

- downgrade esbuild version to match nx requirements ([183308f](https://github.com/llm-tools/embedJs/commit/183308f))


### ❤️ Thank You

- Adhityan K V @adhityan

## 0.1.2 (2024-10-06)


Expand Down
2 changes: 1 addition & 1 deletion core/embedjs-interfaces/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@llm-tools/embedjs-interfaces",
"version": "0.1.2",
"version": "0.1.3",
"description": "Interfaces for extending the embedjs ecosystem",
"dependencies": {
"debug": "^4.3.7",
Expand Down
3 changes: 2 additions & 1 deletion core/embedjs-interfaces/src/interfaces/base-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ export abstract class BaseModel {
return this._temperature ?? BaseModel.defaultTemperature;
}

// eslint-disable-next-line @typescript-eslint/no-empty-function
public async init(): Promise<void> {}

public async query(
system: string,
userQuery: string,
supportingContext: Chunk[],
conversationId: string = 'default',
conversationId = 'default',
): Promise<QueryResponse> {
const conversation = await BaseModel.conversations.getConversation(conversationId);
this.baseDebug(`${conversation.entries.length} history entries found for conversationId '${conversationId}'`);
Expand Down
19 changes: 19 additions & 0 deletions core/embedjs-utils/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
## 0.1.3 (2024-10-06)


### 🚀 Features

- readded local-path and url loaders ([303133c](https://github.com/llm-tools/embedJs/commit/303133c))


### 🩹 Fixes

- exclude examples from release process ([1382185](https://github.com/llm-tools/embedJs/commit/1382185))

- downgrade esbuild version to match nx requirements ([183308f](https://github.com/llm-tools/embedJs/commit/183308f))


### ❤️ Thank You

- Adhityan K V @adhityan

## 0.1.2 (2024-10-06)


Expand Down
4 changes: 2 additions & 2 deletions core/embedjs-utils/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-utils",
"version": "0.1.2",
"version": "0.1.3",
"description": "Useful util functions when extending the embedjs ecosystem",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.2"
"@llm-tools/embedjs-interfaces": "0.1.3"
},
"type": "module",
"main": "./src/index.js",
Expand Down
19 changes: 19 additions & 0 deletions core/embedjs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
## 0.1.3 (2024-10-06)


### 🚀 Features

- readded local-path and url loaders ([303133c](https://github.com/llm-tools/embedJs/commit/303133c))


### 🩹 Fixes

- exclude examples from release process ([1382185](https://github.com/llm-tools/embedJs/commit/1382185))

- downgrade esbuild version to match nx requirements ([183308f](https://github.com/llm-tools/embedJs/commit/183308f))


### ❤️ Thank You

- Adhityan K V @adhityan

## 0.1.2 (2024-10-06)


Expand Down
6 changes: 3 additions & 3 deletions core/embedjs/package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"type": "module",
"name": "@llm-tools/embedjs",
"version": "0.1.2",
"version": "0.1.3",
"description": "A NodeJS RAG framework to easily work with LLMs and custom datasets",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.2",
"@llm-tools/embedjs-utils": "0.1.2",
"@llm-tools/embedjs-interfaces": "0.1.3",
"@llm-tools/embedjs-utils": "0.1.3",
"axios": "^1.7.7",
"debug": "^4.3.7",
"langchain": "^0.3.2",
Expand Down
37 changes: 29 additions & 8 deletions core/embedjs/src/core/rag-application-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@ import {
BaseEmbeddings,
BaseLoader,
BaseModel,
SIMPLE_MODELS,
} from '@llm-tools/embedjs-interfaces';

export class RAGApplicationBuilder {
private temperature: number;
private queryTemplate: string;
private model: BaseModel | SIMPLE_MODELS | null;
private loaders: BaseLoader[];
private vectorDb: BaseDb;
private cache?: BaseCache;
private model: BaseModel | null;
private conversations: BaseConversation;
private queryTemplate: string;
private searchResultCount: number;
private embeddingModel: BaseEmbeddings;
private embeddingRelevanceCutOff: number;
private loaders: BaseLoader[];
private vectorDb: BaseDb;
private conversations: BaseConversation;

constructor() {
this.loaders = [];
Expand All @@ -33,9 +34,13 @@ export class RAGApplicationBuilder {
this.embeddingRelevanceCutOff = 0;
}

/**
* The `build` function creates a new `RAGApplication` entity and initializes it asynchronously based on provided parameters.
* @returns An instance of the `RAGApplication` class after it has been initialized asynchronously.
*/
async build() {
const entity = new RAGApplication(this);
await entity.init();
await entity.init(this);
return entity;
}

Expand All @@ -44,11 +49,23 @@ export class RAGApplicationBuilder {
return this;
}

/**
* The setSearchResultCount function sets the search result count
* @param {number} searchResultCount - The `searchResultCount` parameter
* represents the count of search results picked up from the vector store per query.
* @returns The `this` object is being returned, which allows for method chaining.
*/
setSearchResultCount(searchResultCount: number) {
this.searchResultCount = searchResultCount;
return this;
}

/**
* The function setVectorDb sets a BaseDb object
* @param {BaseDb} vectorDb - The `vectorDb` parameter is an instance of the `BaseDb` class, which
* is used to store vectors in a database.
* @returns The `this` object is being returned, which allows for method chaining.
*/
setVectorDb(vectorDb: BaseDb) {
this.vectorDb = vectorDb;
return this;
Expand Down Expand Up @@ -83,9 +100,13 @@ export class RAGApplicationBuilder {
return this;
}

setModel(model: 'NO_MODEL' | BaseModel) {
setModel(model: 'NO_MODEL' | SIMPLE_MODELS | BaseModel) {
if (typeof model === 'object') this.model = model;
else this.model = null;
else {
if (model === 'NO_MODEL') this.model = null;
else this.model = model;
}

return this;
}

Expand Down
108 changes: 68 additions & 40 deletions core/embedjs/src/core/rag-application.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,75 +14,53 @@ import {
InsertChunkData,
LoaderChunk,
QueryResponse,
SIMPLE_MODELS,
} from '@llm-tools/embedjs-interfaces';
import { cleanString, getUnique } from '@llm-tools/embedjs-utils';

export class RAGApplication {
private readonly debug = createDebugMessages('embedjs:core');
private readonly queryTemplate: string;
private readonly embeddingRelevanceCutOff: number;
private readonly searchResultCount: number;
private readonly queryTemplate: string;
private readonly cache?: BaseCache;
private readonly vectorDb: BaseDb;
private readonly model: BaseModel;
private readonly embeddingRelevanceCutOff: number;

private readonly rawLoaders: BaseLoader[];
private loaders: BaseLoader[];
private model: BaseModel;

constructor(llmBuilder: RAGApplicationBuilder) {
this.cache = llmBuilder.getCache();
BaseLoader.setCache(this.cache);

this.model = llmBuilder.getModel();
if (!this.model) this.debug('No base model set; query function unavailable!');
else {
BaseModel.setDefaultTemperature(llmBuilder.getTemperature());
BaseModel.setConversations(llmBuilder.getConversationsEngine() || new InMemoryConversation());
}

this.queryTemplate = cleanString(llmBuilder.getQueryTemplate());
this.debug(`Using system query template - "${this.queryTemplate}"`);

this.vectorDb = llmBuilder.getVectorDb();
if (!this.vectorDb) throw new SyntaxError('VectorDb not set');

this.rawLoaders = llmBuilder.getLoaders();
this.searchResultCount = llmBuilder.getSearchResultCount();
this.embeddingRelevanceCutOff = llmBuilder.getEmbeddingRelevanceCutOff();

RAGEmbedding.init(llmBuilder.getEmbeddingModel());
}

/**
* The function `embedChunks` embeds the content of chunks by invoking the planned embedding model.
* @param {Pick<Chunk, 'pageContent'>[]} chunks - The `chunks` parameter is an array of objects
* that have a property `pageContent` which contains text content for each chunk.
* @returns The `embedChunks` function is returning the embedded vectors for the chunks.
*/
private async embedChunks(chunks: Pick<Chunk, 'pageContent'>[]) {
const texts = chunks.map(({ pageContent }) => pageContent);
return RAGEmbedding.getEmbedding().embedDocuments(texts);
}

/**
* The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
* an increment ID.
* @param {string} loaderUniqueId - A unique identifier for the loader.
* @param {number} incrementId - The `incrementId` parameter is a number that represents the
* increment value used to generate a unique chunk identifier.
* @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
* `incrementId`.
* The function initializes various components of a language model using provided configurations
* and data. This is an internal method and does not need to be invoked manually.
* @param {RAGApplicationBuilder} llmBuilder - The `llmBuilder` parameter in the `init` function is
* an instance of the `RAGApplicationBuilder` class. It is used to build and configure a Language
* Model (LLM) for a conversational AI system. The function initializes various components of the
* LLM based on the configuration provided
*/
private getChunkUniqueId(loaderUniqueId: string, incrementId: number) {
return `${loaderUniqueId}_${incrementId}`;
}
public async init(llmBuilder: RAGApplicationBuilder) {
this.model = await this.getModel(llmBuilder.getModel());
if (!this.model) this.debug('No base model set; query function unavailable!');
else {
BaseModel.setDefaultTemperature(llmBuilder.getTemperature());
BaseModel.setConversations(llmBuilder.getConversationsEngine() || new InMemoryConversation());
}

/**
* This async function initializes various components such as loaders, model, vector database,
* cache, and pre-loaders.
*/
public async init() {
this.loaders = this.rawLoaders;
this.loaders = llmBuilder.getLoaders();

if (this.model) {
await this.model.init();
Expand All @@ -104,6 +82,56 @@ export class RAGApplication {
this.debug('Initialized pre-loaders');
}

/**
* The function getModel retrieves a specific BaseModel or SIMPLE_MODEL based on the input provided.
* @param {BaseModel | SIMPLE_MODELS | null} model - The `getModel` function you provided is an
* asynchronous function that takes a parameter `model` of type `BaseModel`, `SIMPLE_MODELS`, or
* `null`.
* @returns The `getModel` function returns a Promise that resolves to a `BaseModel` object. If the
* `model` parameter is an object, it returns the object itself. If the `model` parameter is
* `null`, it returns `null`. If the `model` parameter is a specific value from the `SIMPLE_MODELS`
* enum, it creates a new `BaseModel` object based on the model name.
*/
private async getModel(model: BaseModel | SIMPLE_MODELS | null): Promise<BaseModel> {
if (typeof model === 'object') return model;
else if (model === null) return null;
else {
const { OpenAi } = await import('@llm-tools/embedjs-openai').catch(() => {
throw new Error('Package `@llm-tools/embedjs-openai` needs to be installed to use OpenAI models');
});
this.debug('Dynamically imported OpenAi');

if (model === SIMPLE_MODELS.OPENAI_GPT4_O) return new OpenAi({ modelName: 'gpt-4o' });
else if (model === SIMPLE_MODELS['OPENAI_GPT4_TURBO']) return new OpenAi({ modelName: 'gpt-4-turbo' });
else if (model === SIMPLE_MODELS['OPENAI_GPT3.5_TURBO']) return new OpenAi({ modelName: 'gpt-3.5-turbo' });
else throw new Error('Invalid model name');
}
}

/**
* The function `embedChunks` embeds the content of chunks by invoking the planned embedding model.
* @param {Pick<Chunk, 'pageContent'>[]} chunks - The `chunks` parameter is an array of objects
* that have a property `pageContent` which contains text content for each chunk.
* @returns The `embedChunks` function is returning the embedded vectors for the chunks.
*/
private async embedChunks(chunks: Pick<Chunk, 'pageContent'>[]) {
const texts = chunks.map(({ pageContent }) => pageContent);
return RAGEmbedding.getEmbedding().embedDocuments(texts);
}

/**
* The function `getChunkUniqueId` generates a unique identifier by combining a loader unique ID and
* an increment ID.
* @param {string} loaderUniqueId - A unique identifier for the loader.
* @param {number} incrementId - The `incrementId` parameter is a number that represents the
* increment value used to generate a unique chunk identifier.
* @returns The function `getChunkUniqueId` returns a string that combines the `loaderUniqueId` and
* `incrementId`.
*/
private getChunkUniqueId(loaderUniqueId: string, incrementId: number) {
return `${loaderUniqueId}_${incrementId}`;
}

/**
* The function `addLoader` asynchronously initalizes a loader using the provided parameters and adds
* it to the system.
Expand Down
Loading

0 comments on commit 5d3b526

Please sign in to comment.